.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python import mindspore from mindspore import nn, ops, value_and_grad from d2l import mindspore as d2l net = nn.SequentialCell([ nn.Conv2d(1, 6, kernel_size=5, padding=2, pad_mode='pad', weight_init='xavier_uniform'), nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), nn.Conv2d(6, 16, kernel_size=5, pad_mode='valid', weight_init='xavier_uniform'), nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), nn.Flatten(), nn.Dense(16 * 5 * 5, 120, weight_init='xavier_uniform'), nn.Sigmoid(), nn.Dense(120, 84, weight_init='xavier_uniform'), nn.Sigmoid(), nn.Dense(84, 10, weight_init='xavier_uniform') ]) .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python import torch from torch import nn from d2l import torch as d2l net = nn.Sequential( nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), nn.Flatten(), nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(), nn.Linear(120, 84), nn.Sigmoid(), nn.Linear(84, 10)) .. raw:: html

.. raw:: html

mindspore pytorch

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python X = ops.randn(1, 1, 28, 28) for layer in net: X = layer(X) print(layer.__class__.__name__,'output shape:\t',X.shape) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output Conv2d output shape: (1, 6, 28, 28) Sigmoid output shape: (1, 6, 28, 28) AvgPool2d output shape: (1, 6, 14, 14) Conv2d output shape: (1, 16, 10, 10) Sigmoid output shape: (1, 16, 10, 10) AvgPool2d output shape: (1, 16, 5, 5) Flatten output shape: (1, 400) Dense output shape: (1, 120) Sigmoid output shape: (1, 120) Dense output shape: (1, 84) Sigmoid output shape: (1, 84) Dense output shape: (1, 10) .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32) for layer in net: X = layer(X) print(layer.__class__.__name__,'output shape: \t',X.shape) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output Conv2d output shape: torch.Size([1, 6, 28, 28]) Sigmoid output shape: torch.Size([1, 6, 28, 28]) AvgPool2d output shape: torch.Size([1, 6, 14, 14]) Conv2d output shape: torch.Size([1, 16, 10, 10]) Sigmoid output shape: torch.Size([1, 16, 10, 10]) AvgPool2d output shape: torch.Size([1, 16, 5, 5]) Flatten output shape: torch.Size([1, 400]) Linear output shape: torch.Size([1, 120]) Sigmoid output shape: torch.Size([1, 120]) Linear output shape: torch.Size([1, 84]) Sigmoid output shape: torch.Size([1, 84]) Linear output shape: torch.Size([1, 10]) .. raw:: html

.. raw:: html

mindspore pytorch

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python def evaluate_accuracy_gpu(net, dataset, device=None): """使用GPU计算模型在数据集上的精度。""" net.set_train(False) metric = d2l.Accumulator(2) for X, y in dataset.create_tuple_iterator(): metric.add(d2l.accuracy(net(X), y), y.size) return metric[0] / metric[1] .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python def evaluate_accuracy_gpu(net, data_iter, device=None): #@save """使用GPU计算模型在数据集上的精度""" if isinstance(net, nn.Module): net.eval() # 设置为评估模式 if not device: device = next(iter(net.parameters())).device # 正确预测的数量，总预测的数量 metric = d2l.Accumulator(2) with torch.no_grad(): for X, y in data_iter: if isinstance(X, list): # BERT微调所需的（之后将介绍） X = [x.to(device) for x in X] else: X = X.to(device) y = y.to(device) metric.add(d2l.accuracy(net(X), y), y.numel()) return metric[0] / metric[1] 为了使用GPU，我们还需要一点小改动。与 :numref:`sec_softmax_scratch`\ 中定义的\ ``train_epoch_ch3``\ 不同，在进行正向和反向传播之前，我们需要将每一小批量数据移动到我们指定的设备（例如GPU）上。 .. raw:: html

.. raw:: html

mindspore pytorch

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python #@save def train_ch6(net, train_dataset, test_dataset, num_epochs, lr): """用GPU训练模型(在第六章定义)。""" optim = nn.SGD(net.trainable_params(), learning_rate=lr) loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # 定义前向传播函数 def forward_fn(x, y): y_hat = net(x) loss = loss_fn(y_hat, y) return loss, y_hat grad_fn = value_and_grad(forward_fn, None, weights=net.trainable_params(), has_aux=True) # 定义模型单步训练 def train(X, Y, optim): (loss, y_hat), grads = grad_fn(X, Y) loss = ops.depend(loss, optim(grads)) return loss, y_hat animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], legend=['train loss', 'train acc', 'test acc']) timer, num_batches = d2l.Timer(), train_dataset.get_dataset_size() for epoch in range(num_epochs): metric = d2l.Accumulator(3) net.set_train() for i, (X, y) in enumerate(train_dataset.create_tuple_iterator()): timer.start() loss, y_hat = train(X, y, optim) metric.add(loss.asnumpy() * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0]) timer.stop() train_l = metric[0] / metric[2] train_acc = metric[1] / metric[2] if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1: animator.add(epoch + (i + 1) / num_batches, (train_l, train_acc, None)) test_acc = evaluate_accuracy_gpu(net, test_dataset) animator.add(epoch + 1, (None, None, test_acc)) print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, ' f'test acc {test_acc:.3f}') print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec') .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python #@save def train_ch6(net, train_iter, test_iter, num_epochs, lr, device): """用GPU训练模型(在第六章定义)""" def init_weights(m): if type(m) == nn.Linear or type(m) == nn.Conv2d: nn.init.xavier_uniform_(m.weight) net.apply(init_weights) print('training on', device) net.to(device) optimizer = torch.optim.SGD(net.parameters(), lr=lr) loss = nn.CrossEntropyLoss() animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], legend=['train loss', 'train acc', 'test acc']) timer, num_batches = d2l.Timer(), len(train_iter) for epoch in range(num_epochs): # 训练损失之和，训练准确率之和，样本数 metric = d2l.Accumulator(3) net.train() for i, (X, y) in enumerate(train_iter): timer.start() optimizer.zero_grad() X, y = X.to(device), y.to(device) y_hat = net(X) l = loss(y_hat, y) l.backward() optimizer.step() with torch.no_grad(): metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0]) timer.stop() train_l = metric[0] / metric[2] train_acc = metric[1] / metric[2] if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1: animator.add(epoch + (i + 1) / num_batches, (train_l, train_acc, None)) test_acc = evaluate_accuracy_gpu(net, test_iter) animator.add(epoch + 1, (None, None, test_acc)) print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, ' f'test acc {test_acc:.3f}') print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec ' f'on {str(device)}') .. raw:: html

.. raw:: html

mindspore pytorch

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python lr, num_epochs = 0.9, 10 train_ch6(net, train_iter, test_iter, num_epochs, lr) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output loss 0.459, train acc 0.828, test acc 0.826 62807.4 examples/sec .. figure:: output_lenet_4a2e9e_42_1.svg .. raw:: html

.. raw:: html

.. raw:: latex \diilbookstyleinputcell .. code:: python lr, num_epochs = 0.9, 10 train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu()) .. raw:: latex \diilbookstyleoutputcell .. parsed-literal:: :class: output loss 0.460, train acc 0.826, test acc 0.743 242769.9 examples/sec on cuda:0 .. figure:: output_lenet_4a2e9e_45_1.svg .. raw:: html

.. raw:: html