Chapter 9: YOLO Model Training Practice
Haiyue
37min
Chapter 9: YOLO Model Training Practice
Learning Objectives
- Master the complete YOLO model training workflow
- Understand hyperparameter tuning strategies
- Learn training process monitoring and debugging techniques
- Familiarize with transfer learning and pretrained model usage
9.1 Training Environment Preparation
from ultralytics import YOLO
import torch
import yaml
from pathlib import Path
import matplotlib.pyplot as plt
class TrainingSetup:
"""Training Environment Setup"""
def __init__(self):
self.device = self.get_device()
self.setup_reproducibility()
def get_device(self):
"""Get training device"""
if torch.cuda.is_available():
device = 'cuda'
print(f"Using GPU: {torch.cuda.get_device_name()}")
print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
device = 'cpu'
print("Using CPU for training")
return device
def setup_reproducibility(self, seed=42):
"""Set random seed for reproducibility"""
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
print(f"Random seed set to {seed}")
def create_training_config(self, data_path, model_size='n'):
"""Create training configuration"""
config = {
'model': f'yolov8{model_size}.pt',
'data': data_path,
'epochs': 100,
'batch_size': 16,
'imgsz': 640,
'lr0': 0.01,
'lrf': 0.01,
'momentum': 0.937,
'weight_decay': 0.0005,
'warmup_epochs': 3.0,
'warmup_momentum': 0.8,
'warmup_bias_lr': 0.1,
'box': 7.5,
'cls': 0.5,
'dfl': 1.5,
'pose': 12.0,
'kobj': 2.0,
'label_smoothing': 0.0,
'nbs': 64,
'overlap_mask': True,
'mask_ratio': 4,
'dropout': 0.0,
'val': True,
'plots': True,
'save': True,
'save_period': -1,
'cache': False,
'device': self.device,
'workers': 8,
'project': 'runs/train',
'name': 'exp',
'exist_ok': False,
'pretrained': True,
'optimizer': 'SGD',
'verbose': True,
'seed': 0,
'deterministic': True,
'single_cls': False,
'rect': False,
'cos_lr': False,
'close_mosaic': 10,
'resume': False,
'amp': True,
'fraction': 1.0,
'profile': False,
'freeze': None,
}
return config
# Initialize training setup
trainer = TrainingSetup()
training_config = trainer.create_training_config('data.yaml')
print("Training environment preparation complete")
9.2 Basic Training Workflow
class YOLOTrainer:
"""YOLO Trainer"""
def __init__(self, config):
self.config = config
self.model = None
self.training_results = None
def load_model(self):
"""Load model"""
model_name = self.config.get('model', 'yolov8n.pt')
self.model = YOLO(model_name)
print(f"Model loaded: {model_name}")
return self.model
def start_training(self):
"""Start training"""
if self.model is None:
self.load_model()
print("Starting training...")
# Train model
self.training_results = self.model.train(
data=self.config['data'],
epochs=self.config['epochs'],
batch=self.config['batch_size'],
imgsz=self.config['imgsz'],
device=self.config['device'],
workers=self.config['workers'],
project=self.config['project'],
name=self.config['name'],
optimizer=self.config['optimizer'],
lr0=self.config['lr0'],
lrf=self.config['lrf'],
momentum=self.config['momentum'],
weight_decay=self.config['weight_decay'],
warmup_epochs=self.config['warmup_epochs'],
warmup_momentum=self.config['warmup_momentum'],
warmup_bias_lr=self.config['warmup_bias_lr'],
box=self.config['box'],
cls=self.config['cls'],
dfl=self.config['dfl'],
save=self.config['save'],
save_period=self.config['save_period'],
cache=self.config['cache'],
plots=self.config['plots'],
val=self.config['val'],
resume=self.config['resume'],
amp=self.config['amp'],
fraction=self.config['fraction'],
profile=self.config['profile'],
freeze=self.config['freeze'],
cos_lr=self.config['cos_lr'],
close_mosaic=self.config['close_mosaic'],
overlap_mask=self.config['overlap_mask'],
mask_ratio=self.config['mask_ratio'],
dropout=self.config['dropout'],
label_smoothing=self.config['label_smoothing'],
nbs=self.config['nbs'],
single_cls=self.config['single_cls'],
rect=self.config['rect'],
deterministic=self.config['deterministic'],
verbose=self.config['verbose']
)
print("Training complete!")
return self.training_results
def evaluate_model(self):
"""Evaluate model"""
if self.model is None:
print("Please load model first")
return None
print("Starting validation...")
validation_results = self.model.val(
data=self.config['data'],
imgsz=self.config['imgsz'],
batch=self.config['batch_size'],
device=self.config['device'],
plots=True,
verbose=True
)
print("Validation complete!")
return validation_results
def save_model(self, save_path):
"""Save model"""
if self.model is None:
print("No model to save")
return
self.model.save(save_path)
print(f"Model saved to: {save_path}")
def export_model(self, format='onnx', **kwargs):
"""Export model"""
if self.model is None:
print("Please load model first")
return
export_path = self.model.export(format=format, **kwargs)
print(f"Model exported: {export_path}")
return export_path
# Usage example
trainer = YOLOTrainer(training_config)
# results = trainer.start_training()
print("YOLO trainer initialized")
9.3 Hyperparameter Optimization
import optuna
from optuna.samplers import TPESampler
class HyperparameterOptimizer:
"""Hyperparameter Optimizer"""
def __init__(self, data_path, base_config):
self.data_path = data_path
self.base_config = base_config
self.study = None
def objective(self, trial):
"""Optimization objective function"""
# Define hyperparameter search space
lr0 = trial.suggest_float('lr0', 1e-5, 1e-1, log=True)
lrf = trial.suggest_float('lrf', 0.01, 1.0)
momentum = trial.suggest_float('momentum', 0.6, 0.98)
weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
warmup_epochs = trial.suggest_float('warmup_epochs', 0.0, 5.0)
box_loss_gain = trial.suggest_float('box', 0.02, 0.2)
cls_loss_gain = trial.suggest_float('cls', 0.2, 4.0)
dfl_loss_gain = trial.suggest_float('dfl', 0.4, 6.0)
# Create configuration
config = self.base_config.copy()
config.update({
'lr0': lr0,
'lrf': lrf,
'momentum': momentum,
'weight_decay': weight_decay,
'warmup_epochs': warmup_epochs,
'box': box_loss_gain,
'cls': cls_loss_gain,
'dfl': dfl_loss_gain,
'epochs': 30, # Reduce epochs for faster optimization
'name': f'trial_{trial.number}',
'verbose': False
})
# Train model
try:
model = YOLO(config['model'])
results = model.train(**config)
# Return validation mAP50-95 as optimization target
return results.results_dict['metrics/mAP50-95(B)']
except Exception as e:
print(f"Trial {trial.number} failed: {e}")
return 0.0
def optimize(self, n_trials=50):
"""Execute hyperparameter optimization"""
self.study = optuna.create_study(
direction='maximize',
sampler=TPESampler(seed=42)
)
print(f"Starting hyperparameter optimization with {n_trials} trials...")
self.study.optimize(self.objective, n_trials=n_trials)
print("\nOptimization complete!")
print(f"Best parameters: {self.study.best_params}")
print(f"Best score: {self.study.best_value:.4f}")
return self.study.best_params
def plot_optimization_history(self):
"""Visualize optimization history"""
if self.study is None:
print("Please run optimization first")
return
fig = optuna.visualization.plot_optimization_history(self.study)
fig.show()
def plot_parameter_importances(self):
"""Visualize parameter importance"""
if self.study is None:
print("Please run optimization first")
return
fig = optuna.visualization.plot_param_importances(self.study)
fig.show()
# Learning Rate Scheduling Strategies
class LearningRateScheduler:
"""Learning Rate Scheduler"""
@staticmethod
def cosine_annealing(epoch, total_epochs, lr0, lrf):
"""Cosine annealing"""
import math
return lrf + (lr0 - lrf) * (1 + math.cos(math.pi * epoch / total_epochs)) / 2
@staticmethod
def linear_decay(epoch, total_epochs, lr0, lrf):
"""Linear decay"""
return lr0 * (1 - epoch / total_epochs) + lrf * (epoch / total_epochs)
@staticmethod
def exponential_decay(epoch, total_epochs, lr0, lrf):
"""Exponential decay"""
import math
decay_rate = -math.log(lrf / lr0) / total_epochs
return lr0 * math.exp(-decay_rate * epoch)
@staticmethod
def step_decay(epoch, step_size=30, gamma=0.1, lr0=0.01):
"""Step decay"""
return lr0 * (gamma ** (epoch // step_size))
@staticmethod
def plot_schedules(total_epochs=100, lr0=0.01, lrf=0.001):
"""Visualize different scheduling strategies"""
epochs = list(range(total_epochs))
cosine_lrs = [LearningRateScheduler.cosine_annealing(e, total_epochs, lr0, lrf) for e in epochs]
linear_lrs = [LearningRateScheduler.linear_decay(e, total_epochs, lr0, lrf) for e in epochs]
exp_lrs = [LearningRateScheduler.exponential_decay(e, total_epochs, lr0, lrf) for e in epochs]
step_lrs = [LearningRateScheduler.step_decay(e, 30, 0.1, lr0) for e in epochs]
plt.figure(figsize=(12, 8))
plt.plot(epochs, cosine_lrs, label='Cosine Annealing', linewidth=2)
plt.plot(epochs, linear_lrs, label='Linear Decay', linewidth=2)
plt.plot(epochs, exp_lrs, label='Exponential Decay', linewidth=2)
plt.plot(epochs, step_lrs, label='Step Decay', linewidth=2)
plt.xlabel('Epoch')
plt.ylabel('Learning Rate')
plt.title('Learning Rate Schedules Comparison')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
# Visualize learning rate scheduling strategies
LearningRateScheduler.plot_schedules()
print("Hyperparameter optimization tools ready")
9.4 Training Monitoring and Debugging
import wandb
from torch.utils.tensorboard import SummaryWriter
import logging
from datetime import datetime
class TrainingMonitor:
"""Training Monitor"""
def __init__(self, project_name="yolo_training", use_wandb=True, use_tensorboard=True):
self.project_name = project_name
self.use_wandb = use_wandb
self.use_tensorboard = use_tensorboard
# Initialize monitoring tools
if self.use_wandb:
wandb.init(project=project_name)
if self.use_tensorboard:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
self.writer = SummaryWriter(f'runs/{project_name}_{timestamp}')
# Setup logging
self.setup_logging()
def setup_logging(self):
"""Setup logging"""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(f'training_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'),
logging.StreamHandler()
]
)
self.logger = logging.getLogger(__name__)
def log_metrics(self, metrics_dict, step):
"""Log training metrics"""
# Log to file
metrics_str = ", ".join([f"{k}: {v:.4f}" for k, v in metrics_dict.items()])
self.logger.info(f"Step {step} - {metrics_str}")
# W&B logging
if self.use_wandb:
wandb.log(metrics_dict, step=step)
# TensorBoard logging
if self.use_tensorboard:
for key, value in metrics_dict.items():
self.writer.add_scalar(key, value, step)
def log_learning_rate(self, lr, step):
"""Log learning rate"""
self.log_metrics({'learning_rate': lr}, step)
def log_loss_components(self, losses, step):
"""Log loss components"""
loss_dict = {
'train/box_loss': losses.get('train/box_loss', 0),
'train/cls_loss': losses.get('train/cls_loss', 0),
'train/dfl_loss': losses.get('train/dfl_loss', 0),
'val/box_loss': losses.get('val/box_loss', 0),
'val/cls_loss': losses.get('val/cls_loss', 0),
'val/dfl_loss': losses.get('val/dfl_loss', 0)
}
self.log_metrics(loss_dict, step)
def log_model_metrics(self, metrics, step):
"""Log model evaluation metrics"""
metric_dict = {
'metrics/precision': metrics.get('metrics/precision(B)', 0),
'metrics/recall': metrics.get('metrics/recall(B)', 0),
'metrics/mAP50': metrics.get('metrics/mAP50(B)', 0),
'metrics/mAP50-95': metrics.get('metrics/mAP50-95(B)', 0)
}
self.log_metrics(metric_dict, step)
def close(self):
"""Close monitor"""
if self.use_tensorboard:
self.writer.close()
if self.use_wandb:
wandb.finish()
class TrainingDebugger:
"""Training Debugging Tool"""
def __init__(self):
pass
def diagnose_training_issues(self, results_dict):
"""Diagnose training issues"""
issues = []
# Check loss trends
if 'train/box_loss' in results_dict:
box_loss = results_dict['train/box_loss']
if box_loss > 1.0:
issues.append("Bounding box loss too high, may need to adjust box loss weight or learning rate")
if 'train/cls_loss' in results_dict:
cls_loss = results_dict['train/cls_loss']
if cls_loss > 1.0:
issues.append("Classification loss too high, check if class labels are correct")
# Check mAP
if 'metrics/mAP50-95(B)' in results_dict:
mAP = results_dict['metrics/mAP50-95(B)']
if mAP < 0.1:
issues.append("mAP too low, check data quality or model configuration")
# Check precision and recall
if 'metrics/precision(B)' in results_dict and 'metrics/recall(B)' in results_dict:
precision = results_dict['metrics/precision(B)']
recall = results_dict['metrics/recall(B)']
if precision < 0.3:
issues.append("Low precision, may have too many false positives")
if recall < 0.3:
issues.append("Low recall, may be missing many targets")
return issues
def suggest_fixes(self, issues):
"""Suggest solutions"""
suggestions = {
"High Loss": [
"Lower learning rate",
"Check data annotation quality",
"Adjust loss weights",
"Increase warmup epochs"
],
"Low mAP": [
"Increase training epochs",
"Use larger model",
"Improve data augmentation",
"Check anchor settings"
],
"Low Precision": [
"Increase confidence threshold",
"Improve NMS settings",
"Add negative samples"
],
"Low Recall": [
"Lower confidence threshold",
"Increase data augmentation",
"Use multi-scale training"
]
}
print("Training Issue Diagnosis:")
print("=" * 40)
if not issues:
print("No obvious issues found")
return
for issue in issues:
print(f"Issue: {issue}")
# Match suggestions
for category, suggestions_list in suggestions.items():
if any(keyword in issue for keyword in category.split()):
print("Suggested Solutions:")
for suggestion in suggestions_list:
print(f" • {suggestion}")
break
print()
# Training Health Check
class TrainingHealthCheck:
"""Training Health Check"""
def __init__(self):
self.loss_history = []
self.metric_history = []
def check_convergence(self, loss_values, window_size=10):
"""Check convergence status"""
if len(loss_values) < window_size * 2:
return "Insufficient data"
recent_losses = loss_values[-window_size:]
previous_losses = loss_values[-window_size*2:-window_size]
recent_avg = sum(recent_losses) / len(recent_losses)
previous_avg = sum(previous_losses) / len(previous_losses)
improvement = (previous_avg - recent_avg) / previous_avg
if improvement > 0.05:
return "Converging"
elif improvement > -0.02:
return "Slow convergence"
else:
return "Possibly diverging"
def detect_overfitting(self, train_loss, val_loss, threshold=0.1):
"""Detect overfitting"""
if len(train_loss) < 10 or len(val_loss) < 10:
return "Insufficient data"
train_trend = (train_loss[-1] - train_loss[-10]) / 10
val_trend = (val_loss[-1] - val_loss[-10]) / 10
if train_trend < -0.01 and val_trend > 0.01:
return "Possible overfitting"
elif abs(train_trend) < 0.001 and abs(val_trend) < 0.001:
return "Training stable"
else:
return "Normal training"
def check_learning_rate(self, loss_values, lr_values):
"""Check if learning rate is appropriate"""
if len(loss_values) < 5:
return "Insufficient data"
recent_loss_change = (loss_values[-1] - loss_values[-5]) / 5
current_lr = lr_values[-1] if lr_values else 0.01
if recent_loss_change > 0.01:
return f"Learning rate may be too high (current: {current_lr:.6f})"
elif abs(recent_loss_change) < 0.0001:
return f"Learning rate may be too low (current: {current_lr:.6f})"
else:
return f"Learning rate appropriate (current: {current_lr:.6f})"
print("Training monitoring and debugging tools ready")
9.5 Transfer Learning Strategies
class TransferLearningManager:
"""Transfer Learning Manager"""
def __init__(self):
self.pretrained_models = {
'yolov8n': 'yolov8n.pt',
'yolov8s': 'yolov8s.pt',
'yolov8m': 'yolov8m.pt',
'yolov8l': 'yolov8l.pt',
'yolov8x': 'yolov8x.pt'
}
def select_pretrained_model(self, target_classes, data_size, compute_budget):
"""Select appropriate pretrained model"""
recommendations = []
if data_size < 1000:
if compute_budget == 'low':
recommendations.append('yolov8n - Small dataset, limited compute resources')
else:
recommendations.append('yolov8s - Small dataset, but larger model can improve performance')
elif data_size < 10000:
if compute_budget == 'low':
recommendations.append('yolov8s - Medium dataset, reasonable model size')
elif compute_budget == 'medium':
recommendations.append('yolov8m - Balanced performance and efficiency')
else:
recommendations.append('yolov8l - Sufficient data, can use large model')
else: # data_size >= 10000
if compute_budget == 'low':
recommendations.append('yolov8s - Large dataset, but compute limited')
elif compute_budget == 'medium':
recommendations.append('yolov8m - Large dataset, medium model')
elif compute_budget == 'high':
recommendations.append('yolov8l - Large dataset, large model')
else:
recommendations.append('yolov8x - Maximum performance requirement')
# Adjust recommendations based on class count
if target_classes > 80:
recommendations.append('Recommendation: Many classes, consider using larger model')
elif target_classes < 10:
recommendations.append('Recommendation: Few classes, can use smaller model')
return recommendations
def create_transfer_config(self, pretrained_model, freeze_layers=None):
"""Create transfer learning configuration"""
config = {
'model': pretrained_model,
'pretrained': True,
'freeze': freeze_layers, # Number of layers to freeze, None means no freezing
}
# Adjust learning rate based on freezing
if freeze_layers:
config.update({
'lr0': 0.001, # Lower learning rate
'warmup_epochs': 1.0,
})
else:
config.update({
'lr0': 0.01, # Standard learning rate
'warmup_epochs': 3.0,
})
return config
def gradual_unfreezing_schedule(self, total_epochs):
"""Gradual unfreezing schedule"""
schedule = []
# Stage 1: Freeze backbone
schedule.append({
'epochs': total_epochs // 4,
'freeze': 10, # Freeze first 10 layers
'lr': 0.001,
'description': 'Freeze backbone, train detection head'
})
# Stage 2: Partial unfreezing
schedule.append({
'epochs': total_epochs // 4,
'freeze': 5, # Freeze only first 5 layers
'lr': 0.0005,
'description': 'Partial unfreezing, fine-tune later layers'
})
# Stage 3: Complete unfreezing
schedule.append({
'epochs': total_epochs // 2,
'freeze': None,
'lr': 0.0001,
'description': 'Complete unfreezing, end-to-end fine-tuning'
})
return schedule
def domain_adaptation_config(self, source_domain, target_domain):
"""Domain adaptation configuration"""
adaptation_strategies = {
('general', 'medical'): {
'data_augmentation': ['contrast', 'brightness', 'gaussian_blur'],
'loss_weights': {'box': 7.5, 'cls': 1.0, 'dfl': 1.5},
'learning_rate': 0.001,
'freeze_backbone': True
},
('general', 'industrial'): {
'data_augmentation': ['rotation', 'scale', 'noise'],
'loss_weights': {'box': 10.0, 'cls': 0.5, 'dfl': 2.0},
'learning_rate': 0.005,
'freeze_backbone': False
},
('general', 'aerial'): {
'data_augmentation': ['rotation', 'scale', 'flip'],
'loss_weights': {'box': 5.0, 'cls': 1.5, 'dfl': 1.0},
'learning_rate': 0.01,
'freeze_backbone': False
}
}
key = (source_domain, target_domain)
if key in adaptation_strategies:
return adaptation_strategies[key]
else:
# Default configuration
return {
'data_augmentation': ['horizontal_flip', 'scale', 'brightness'],
'loss_weights': {'box': 7.5, 'cls': 0.5, 'dfl': 1.5},
'learning_rate': 0.01,
'freeze_backbone': False
}
class FineTuningStrategies:
"""Fine-tuning Strategies"""
@staticmethod
def discriminative_learning_rates(base_lr=0.01, backbone_ratio=0.1, neck_ratio=0.5):
"""Discriminative learning rate strategy"""
return {
'backbone_lr': base_lr * backbone_ratio,
'neck_lr': base_lr * neck_ratio,
'head_lr': base_lr,
'description': 'Backbone uses smaller learning rate, detection head uses larger learning rate'
}
@staticmethod
def layer_wise_decay(base_lr=0.01, decay_rate=0.9, num_layers=24):
"""Layer-wise decay learning rate"""
layer_lrs = []
for i in range(num_layers):
lr = base_lr * (decay_rate ** (num_layers - i - 1))
layer_lrs.append(lr)
return {
'layer_learning_rates': layer_lrs,
'description': 'Deeper layers use larger learning rate, shallower layers use smaller learning rate'
}
@staticmethod
def cosine_restart_schedule(T_0=10, T_mult=2, eta_min=1e-6, eta_max=1e-2):
"""Cosine restart scheduling"""
return {
'scheduler': 'cosine_restart',
'T_0': T_0,
'T_mult': T_mult,
'eta_min': eta_min,
'eta_max': eta_max,
'description': 'Periodic learning rate restart to avoid local optima'
}
# Usage example
tl_manager = TransferLearningManager()
# Get model recommendations
recommendations = tl_manager.select_pretrained_model(
target_classes=20,
data_size=5000,
compute_budget='medium'
)
print("Transfer Learning Model Recommendations:")
for rec in recommendations:
print(f" • {rec}")
# Create transfer learning configuration
transfer_config = tl_manager.create_transfer_config('yolov8m.pt', freeze_layers=5)
print(f"\nTransfer Learning Configuration: {transfer_config}")
# Gradual unfreezing schedule
schedule = tl_manager.gradual_unfreezing_schedule(total_epochs=100)
print(f"\nGradual Unfreezing Schedule:")
for i, stage in enumerate(schedule, 1):
print(f" Stage {i}: {stage['description']}")
print(f" Epochs: {stage['epochs']}, Freeze: {stage['freeze']}, LR: {stage['lr']}")
9.6 Advanced Training Techniques
class AdvancedTrainingTechniques:
"""Advanced Training Techniques"""
def __init__(self):
pass
def mixed_precision_training(self):
"""Mixed precision training configuration"""
return {
'amp': True, # Enable automatic mixed precision
'description': 'Use FP16 and FP32 mixed precision, accelerate training and save memory',
'benefits': [
'1.5-2x training speed improvement',
'Approximately 50% memory reduction',
'Almost no accuracy loss'
],
'requirements': [
'GPU supports Tensor Cores (V100, RTX series)',
'PyTorch 1.6+',
'CUDA 10.2+'
]
}
def exponential_moving_average(self, decay=0.9999):
"""Exponential moving average configuration"""
return {
'ema_decay': decay,
'description': 'Use EMA to smooth model weights, improve generalization',
'implementation': '''
# EMA update formula
ema_weights = decay * ema_weights + (1 - decay) * current_weights
''',
'benefits': [
'Reduce model weight oscillation',
'Improve validation performance',
'More stable convergence'
]
}
def gradient_clipping(self, max_norm=10.0):
"""Gradient clipping configuration"""
return {
'max_norm': max_norm,
'description': 'Limit gradient norm to prevent gradient explosion',
'when_to_use': [
'Gradient norm often >10',
'Loss becomes NaN or Inf',
'Training unstable'
]
}
def knowledge_distillation_setup(self, teacher_model, temperature=4.0, alpha=0.7):
"""Knowledge distillation setup"""
return {
'teacher_model': teacher_model,
'temperature': temperature,
'alpha': alpha,
'description': 'Use large model to guide small model training',
'loss_function': '''
distillation_loss = alpha * KL_div(student_soft, teacher_soft) +
(1 - alpha) * cross_entropy(student, targets)
where:
student_soft = softmax(student_logits / temperature)
teacher_soft = softmax(teacher_logits / temperature)
'''
}
def multi_scale_training(self, scales=[320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640]):
"""Multi-scale training"""
return {
'scales': scales,
'description': 'Randomly select input scale to improve multi-scale generalization',
'strategy': {
'random_scale': 'Randomly select a scale for each batch',
'scheduled_scale': 'Change scale according to schedule',
'progressive_scale': 'Gradually increase from small to large scale'
}
}
def label_smoothing(self, smoothing=0.1):
"""Label smoothing"""
return {
'label_smoothing': smoothing,
'description': 'Soften one-hot labels to improve generalization',
'formula': f'y_smooth = (1 - {smoothing}) * y_hot + {smoothing} / num_classes',
'benefits': [
'Reduce overfitting',
'Improve model calibration',
'Enhance generalization'
]
}
def focal_loss_config(self, alpha=0.25, gamma=2.0):
"""Focal loss configuration"""
return {
'alpha': alpha,
'gamma': gamma,
'description': 'Address class imbalance and hard sample problems',
'formula': 'FL = -α(1-p)^γ * log(p)',
'use_cases': [
'Severe class imbalance',
'Many easy negative samples',
'Need to focus on hard samples'
]
}
class TrainingRecipes:
"""Training Recipe Collection"""
@staticmethod
def small_dataset_recipe(epochs=200):
"""Small dataset training recipe"""
return {
'name': 'Small Dataset Training Recipe',
'epochs': epochs,
'model': 'yolov8n.pt',
'batch_size': 32,
'lr0': 0.001,
'lrf': 0.01,
'warmup_epochs': 5.0,
'label_smoothing': 0.1,
'mixup': 0.5,
'copy_paste': 0.3,
'mosaic': 0.8,
'freeze': 10, # Freeze backbone
'data_augmentation': 'aggressive',
'description': 'Suitable for datasets with <1000 images'
}
@staticmethod
def large_dataset_recipe(epochs=100):
"""Large dataset training recipe"""
return {
'name': 'Large Dataset Training Recipe',
'epochs': epochs,
'model': 'yolov8l.pt',
'batch_size': 16,
'lr0': 0.01,
'lrf': 0.001,
'warmup_epochs': 3.0,
'label_smoothing': 0.0,
'mixup': 0.0,
'mosaic': 1.0,
'freeze': None, # No freezing
'amp': True,
'description': 'Suitable for datasets with >10000 images'
}
@staticmethod
def production_ready_recipe(epochs=150):
"""Production environment training recipe"""
return {
'name': 'Production Environment Training Recipe',
'epochs': epochs,
'model': 'yolov8m.pt',
'batch_size': 16,
'lr0': 0.01,
'lrf': 0.01,
'warmup_epochs': 3.0,
'cos_lr': True,
'label_smoothing': 0.05,
'amp': True,
'ema_decay': 0.9999,
'save_period': 10,
'val': True,
'plots': True,
'deterministic': True,
'description': 'Recommended production configuration, balanced speed and accuracy'
}
@staticmethod
def quick_experiment_recipe(epochs=50):
"""Quick experiment recipe"""
return {
'name': 'Quick Experiment Recipe',
'epochs': epochs,
'model': 'yolov8n.pt',
'batch_size': 64,
'lr0': 0.01,
'imgsz': 416, # Smaller input size
'cache': 'ram', # Cache to memory
'workers': 8,
'amp': True,
'val': False, # Skip validation to accelerate training
'plots': False,
'description': 'Quick validation of ideas, suitable for hyperparameter search'
}
# Usage example
advanced_techniques = AdvancedTrainingTechniques()
recipes = TrainingRecipes()
print("Advanced Training Techniques:")
print("=" * 40)
# Mixed precision training
mp_config = advanced_techniques.mixed_precision_training()
print(f"\nMixed Precision Training: {mp_config['description']}")
for benefit in mp_config['benefits']:
print(f" • {benefit}")
# Exponential moving average
ema_config = advanced_techniques.exponential_moving_average()
print(f"\nEMA: {ema_config['description']}")
# Training recipes
print(f"\nTraining Recipe Examples:")
small_recipe = recipes.small_dataset_recipe()
print(f" {small_recipe['name']}: {small_recipe['description']}")
print("\nAdvanced training techniques ready")
9.7 Chapter Summary
After completing this chapter, you should be able to:
- Configure and start YOLO model training
- Understand and tune key hyperparameters
- Implement effective training monitoring
- Apply transfer learning strategies
- Use advanced training techniques
- Diagnose and solve training issues
- Select appropriate training recipes
def training_checklist():
"""Training Checklist"""
checklist = {
"Pre-training Preparation": [
"□ Dataset format correct",
"□ Data quality check passed",
"□ Train/validation split reasonable",
"□ Hardware environment confirmed",
"□ Dependency library versions compatible"
],
"Configuration Setup": [
"□ Model size selection appropriate",
"□ Batch size and learning rate matched",
"□ Data augmentation strategy determined",
"□ Loss weights adjusted",
"□ Monitoring tools configured"
],
"Training Process": [
"□ Learning rate schedule reasonable",
"□ Loss decreasing normally",
"□ Validation metrics improving",
"□ No overfitting signs",
"□ Regular checkpoint saving"
],
"Training Completion": [
"□ Model performance meets requirements",
"□ Best weights saved",
"□ Training logs complete",
"□ Model exported to deployment format",
"□ Experimental results recorded"
]
}
print("YOLO Training Checklist:")
print("=" * 40)
for category, items in checklist.items():
print(f"\n{category}:")
for item in items:
print(f" {item}")
print("\nKey Factors for Training Success:")
success_factors = [
"High-quality annotated data",
"Appropriate model size",
"Proper hyperparameter settings",
"Sufficient training time",
"Continuous monitoring and tuning"
]
for factor in success_factors:
print(f" • {factor}")
training_checklist()
print("\nYOLO Model Training Practice Complete!")
Key Points: Master the complete YOLO model training workflow, from environment setup to advanced technique application, building systematic training and tuning capabilities.