import torch from torch import nn from d2l import torch as d2l batch_size = 256 train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size) num_inputs = 28 * 28 num_outputs = 10 net = nn.Sequential(nn.Flatten(),nn.Linear(num_inputs,num_outputs)) def init_weights(m): if type(m) == nn.Linear: nn.init.normal_(m.weight,std=0.01) return net.apply(init_weights) loss = torch.nn.CrossEntropyLoss(reduction='none') trainer = torch.optim.SGD(net.parameters(),lr=0.1) def accuracy(y_hat,y): if len(y_hat.shape)>1 and y_hat.shape[1]>1 : y_hat_max = y_hat.argmax(axis=1) #注意只有张量才有.type cmp = y_hat_max.type(y.dtype) == y #因为后面要除以总样本数,计算正确率,所以返回float型的 return float(cmp.sum()) #定义在整个测试集上计算准确率的函数 def evaluate_accuracy(net,data_iter): if isinstance(net,torch.nn.Module): net.eval() accumulator = Accumulator(2) with torch.no_grad(): for X,y in data_iter: y_hat = net(X) accumulator.add(accuracy(y_hat,y),y.numel()) return accumulator[0] / accumulator[1] class Accumulator: #@save """在n个变量上累加""" def __init__(self, n): self.data = [0.0] * n def add(self, *args): self.data = [a + float(b) for a, b in zip(self.data, args)] def reset(self): self.data = [0.0] * len(self.data) def __getitem__(self, idx): return self.data[idx] def my_train_epoch_ch3(net,train_iter,loss,updater): #@save if isinstance(net,torch.nn.Module): net.train() metric = Accumulator(2) for X,y in train_iter: y_hat = net(X) l = loss(y_hat,y) if isinstance(updater,torch.optim.Optimizer): updater.zero_grad() #print(f'l.numel()={l.numel()}, type(l)={type(l)}, l={l}, l.mean()={l.mean()}') l.mean().backward() updater.step() else: l.sum().backward() updater(X.shape[0]) metric.add(l.sum(),y.numel()) return metric[0] / metric[1] #返回train_loss def my_train_ch3(net,num_epochs,loss,train_iter,test_iter,updater): #@save for epoch in range(num_epochs): train_loss = my_train_epoch_ch3(net,train_iter,loss,updater) train_accuracy = evaluate_accuracy(net,test_iter) print(f'epoch {epoch+1}, loss {train_loss:f}, accuracy {train_accuracy:f}\n') num_epochs = 3 my_train_ch3(net,num_epochs,loss,train_iter,test_iter,trainer)
上面的训练代码中存在这样一个问题:
一定是loss = CrossEntropyLoss(reduction='none') 以及 l.mean.backward():
为什么不能是loss = torch.nn.CrossEntropyLoss() 以及 l.sum().backward()?
或者 loss = torch.nn.CrossEntropyLoss() 以及 l.mean().backward()?
torch.nn.CrossEntorpyLoss()没有参数的情况下默认返回的是所有样本的损失的平均值,在这里是一个256的向量。在reduction='none'时返回的就是一个256的向量,是一个批量的大小。256个元素每一个就标示了一个样本的损失值,每个样本的损失是一个标量值,这个标量就是预测出的真实的那一类的概率。
事实上,(1)和(2)两种方式应该也是可以的,因为它们实验跑出来的accuracy是正常着的,只是有不同的loss值。