1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
| import torch from torchvision.datasets import FashionMNIST from torch.utils.data import DataLoader import torchvision.transforms as transforms import numpy as np
def load_data(): train_data = FashionMNIST(root='../data', train=True, download=False, transform=transforms.ToTensor()) test_data = FashionMNIST(root='../data', train=False, download=False, transform=transforms.ToTensor()) train_iter = DataLoader(train_data, batch_size=256, shuffle=True, num_workers=0) test_iter = DataLoader(test_data, batch_size=256, shuffle=False, num_workers=0) return train_iter, test_iter
num_inputs, num_outputs, num_hiddens = 784, 10, 256 W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float) b1 = torch.zeros(num_hiddens, dtype=torch.float) W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float) b2 = torch.zeros(num_outputs, dtype=torch.float)
params = [W1, b1, W2, b2] for param in params: param.requires_grad_(requires_grad=True)
def relu(X): return torch.max(input=X, other=torch.zeros(X.shape))
def softmax(X): X_exp = torch.exp(X) partition = X_exp.sum(1, keepdim=True) return X_exp / partition
def net(X): X = X.reshape((-1, num_inputs)) H = relu(torch.matmul(X, W1) + b1) output = torch.matmul(H, W2) + b2 return softmax(output)
def cross_entropy(y_predict, y): return -torch.log(y_predict[range(len(y_predict)), y])
def sgd(params, lr, batch_size): for param in params: param.data -= lr * param.grad / batch_size
def evaluate_accuracy(net, data_iter): acc_sum, n = 0.0, 0 for X, y in data_iter: acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() n += y.shape[0] return acc_sum / n
def train(net, train_iter, test_iter, loss, num_epochs, batch_size=256, lr=0.1, params=None, optimizer=None): for epoch in range(num_epochs): train_loss_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in train_iter: y_predict = net(X) l = cross_entropy(y_predict, y).sum() l.backward() sgd(params, lr, batch_size)
W1.grad.data.zero_() b1.grad.data.zero_() W2.grad.data.zero_() b2.grad.data.zero_()
train_loss_sum += l.item() train_acc_sum += (y_predict.argmax(dim=1) == y).sum().item() n += y.shape[0] test_acc = evaluate_accuracy(net, test_iter) print('epoch %d, loss %.4f, train_acc %.4f, test_acc %.4f' % (epoch + 1, train_loss_sum / n, train_acc_sum / n, test_acc))
if __name__ == "__main__": train_iter, test_iter = load_data() train(net, train_iter, test_iter, cross_entropy, 10, lr=0.1, params=params)
|