fig, ax = plt.subplots(nrows=10, ncols=10, figsize=(10,10))
for i in range(10): # Column by column
num_i = X_train[y_train == i]
ax[0][i].set_title(i)
for j in range(10): # Row by row
ax[j][i].axis('off')
ax[j][i].imshow(num_i.iloc[j, :].to_numpy().astype(np.uint8).reshape(28, 28), cmap='gray')
class CharData(Dataset): #class 삽입
def __init__(self,
images,
labels=None,
transform=None,
):
self.X = images
self.y = labels
self.transform = transform
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
img = np.array(self.X.iloc[idx, :], dtype='uint8').reshape([28, 28, 1])
if self.transform is not None:
img = self.transform(img)
if self.y is not None:
y = np.zeros(10, dtype='float32')
y[self.y.iloc[idx]] = 1
return img, y
else:
return img
# 샘플확인
fig, ax = plt.subplots(nrows=1, ncols=16, figsize=(30,4))
for batch in train_loader:
for i in range(16):
ax[i].set_title(batch[1][i].data.numpy().argmax())
ax[i].imshow(batch[0][i, 0], cmap='gray')
break
DEPTH_MULT = 2
class ConvLayer(nn.Module):
def __init__(self, input_size, output_size, kernel_size=3):
super(ConvLayer, self).__init__()
self.ops = nn.Sequential(
nn.Conv2d(input_size, output_size, kernel_size=kernel_size, stride=1, padding=kernel_size//2),
nn.BatchNorm2d(output_size),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.ops(x)
class FCLayer(nn.Module):
def __init__(self, input_size, output_size):
super(FCLayer, self).__init__()
self.ops = nn.Sequential(
nn.Linear(input_size, output_size),
nn.BatchNorm1d(output_size),
nn.ReLU(inplace=True)
)
self.residual = input_size == output_size
def forward(self, x):
if self.residual:
return (self.ops(x) + x) / np.sqrt(2)
return self.ops(x)
def mixup(x, shuffle, lam, i, j):
if shuffle is not None and lam is not None and i == j:
x = lam * x + (1 - lam) * x[shuffle]
return x
class Net(nn.Module):
def __init__(self, num_classes):
super(Net, self).__init__()
self.conv1 = ConvLayer(1, DEPTH_MULT * 32)
self.conv2 = ConvLayer(DEPTH_MULT * 32, DEPTH_MULT * 32)
self.conv3 = ConvLayer(DEPTH_MULT * 32, DEPTH_MULT * 32)
self.conv4 = ConvLayer(DEPTH_MULT * 32, DEPTH_MULT * 32)
self.conv5 = ConvLayer(DEPTH_MULT * 32, DEPTH_MULT * 64)
self.conv6 = ConvLayer(DEPTH_MULT * 64, DEPTH_MULT * 64)
self.conv7 = ConvLayer(DEPTH_MULT * 64, DEPTH_MULT * 64)
self.conv8 = ConvLayer(DEPTH_MULT * 64, DEPTH_MULT * 64)
self.conv9 = ConvLayer(DEPTH_MULT * 64, DEPTH_MULT * 64)
self.conv10 = ConvLayer(DEPTH_MULT * 64, DEPTH_MULT * 64)
self.mp = nn.MaxPool2d(kernel_size=2, stride=2)
self.fc1 = FCLayer(DEPTH_MULT * 64 * 7 * 7, DEPTH_MULT * 512)
self.fc2 = FCLayer(DEPTH_MULT * 512, DEPTH_MULT * 512)
self.fc3 = FCLayer(DEPTH_MULT * 512, DEPTH_MULT * 512)
self.fc4 = FCLayer(DEPTH_MULT * 512, DEPTH_MULT * 512)
self.projection = nn.Linear(DEPTH_MULT * 512, 10)
def forward(self, x):
if isinstance(x, list):
x, shuffle, lam = x
else:
shuffle = None
lam = None
# Decide which layer to mixup
j = np.random.randint(15)
x = mixup(x, shuffle, lam, 0, j)
x = self.conv1(x)
x = mixup(x, shuffle, lam, 1, j)
x = self.conv2(x)
x = mixup(x, shuffle, lam, 2, j)
x = self.conv3(x)
x = mixup(x, shuffle, lam, 3, j)
x = self.conv4(x)
x = self.mp(x)
x = mixup(x, shuffle, lam, 4, j)
x = self.conv5(x)
x = mixup(x, shuffle, lam, 5, j)
x = self.conv6(x)
x = mixup(x, shuffle, lam, 6, j)
x = self.conv7(x)
x = mixup(x, shuffle, lam, 7, j)
x = self.conv8(x)
x = mixup(x, shuffle, lam, 8, j)
x = self.conv9(x)
x = mixup(x, shuffle, lam, 9, j)
x = self.conv10(x)
x = self.mp(x)
x = x.view(x.size(0), -1)
x = mixup(x, shuffle, lam, 10, j)
x = self.fc1(x)
x = mixup(x, shuffle, lam, 11, j)
x = self.fc2(x)
x = mixup(x, shuffle, lam, 12, j)
x = self.fc3(x)
x = mixup(x, shuffle, lam, 13, j)
x = self.fc4(x)
x = mixup(x, shuffle, lam, 14, j)
x = self.projection(x)
return x
def criterion(input, target, size_average=True):
l = -(target * torch.log(F.softmax(input, dim=1) + 1e-10)).sum(1)
if size_average:
l = l.mean()
else:
l = l.sum()
return l
model = Net(10)
model = model.to(device)
n_epochs = 100
optimizer = optim.Adam(model.parameters(), lr=0.0001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=n_epochs // 4, gamma=0.1)
def train(epoch, history=None):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data = data.to(device)
target = target.to(device)
# mixup
alpha = 2
lam = np.random.beta(alpha, alpha)
shuffle = torch.randperm(data.shape[0])
target = lam * target + (1 - lam) * target[shuffle]
optimizer.zero_grad()
output = model([data, shuffle, lam])
loss = criterion(output, target)
if history is not None:
history.loc[epoch + batch_idx / len(train_loader), 'train_loss'] = loss.data.cpu().numpy()
loss.backward()
optimizer.step()
if (batch_idx + 1) % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLR: {:.6f}\tLoss: {:.6f}'.format(
epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
100. * (batch_idx + 1) / len(train_loader),
optimizer.state_dict()['param_groups'][0]['lr'],
loss.data))
exp_lr_scheduler.step()
def evaluate(epoch, history=None):
model.eval()
loss = 0
correct = 0
with torch.no_grad():
for data, target in dev_loader:
data = data.to(device)
target = target.to(device)
output = model(data)
loss += criterion(output, target, size_average=False).data
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.max(1, keepdim=True)[1].data.view_as(pred)).cpu().sum().numpy()
loss /= len(dev_loader.dataset)
accuracy = correct / len(dev_loader.dataset)
if history is not None:
history.loc[epoch, 'dev_loss'] = loss.cpu().numpy()
history.loc[epoch, 'dev_accuracy'] = accuracy
print('Dev loss: {:.4f}, Dev accuracy: {}/{} ({:.3f}%)\n'.format(
loss, correct, len(dev_loader.dataset),
100. * accuracy))
%%time
import gc
history = pd.DataFrame()
for epoch in range(n_epochs):
torch.cuda.empty_cache()
gc.collect()
train(epoch, history)
evaluate(epoch, history)
#이미 높은 정확도를 보이고 있기때문에 EarlyStopping 해도 좋을 듯
Train Epoch: 0 [25600/41580 (61%)] LR: 0.000100 Loss: 1.021838
Dev loss: 0.1099, Dev accuracy: 416/420 (99.048%)
Train Epoch: 1 [25600/41580 (61%)] LR: 0.000100 Loss: 0.813835
Dev loss: 0.1344, Dev accuracy: 419/420 (99.762%)
Train Epoch: 2 [25600/41580 (61%)] LR: 0.000100 Loss: 0.691444
Dev loss: 0.1147, Dev accuracy: 416/420 (99.048%)
Train Epoch: 3 [25600/41580 (61%)] LR: 0.000100 Loss: 0.611507
Dev loss: 0.1211, Dev accuracy: 417/420 (99.286%)
Train Epoch: 4 [25600/41580 (61%)] LR: 0.000100 Loss: 0.721414
Dev loss: 0.0712, Dev accuracy: 420/420 (100.000%)
Train Epoch: 5 [25600/41580 (61%)] LR: 0.000100 Loss: 0.384039
Dev loss: 0.0729, Dev accuracy: 418/420 (99.524%)
Train Epoch: 6 [25600/41580 (61%)] LR: 0.000100 Loss: 0.900925
Dev loss: 0.0719, Dev accuracy: 419/420 (99.762%)
Train Epoch: 7 [25600/41580 (61%)] LR: 0.000100 Loss: 0.639206
Dev loss: 0.0600, Dev accuracy: 419/420 (99.762%)
Train Epoch: 8 [25600/41580 (61%)] LR: 0.000100 Loss: 0.655744
Dev loss: 0.0787, Dev accuracy: 418/420 (99.524%)
Train Epoch: 9 [25600/41580 (61%)] LR: 0.000100 Loss: 0.638043
Dev loss: 0.0792, Dev accuracy: 419/420 (99.762%)
Train Epoch: 10 [25600/41580 (61%)] LR: 0.000100 Loss: 0.579519
Dev loss: 0.0646, Dev accuracy: 418/420 (99.524%)
Train Epoch: 11 [25600/41580 (61%)] LR: 0.000100 Loss: 0.468557
Dev loss: 0.0682, Dev accuracy: 419/420 (99.762%)
Train Epoch: 12 [25600/41580 (61%)] LR: 0.000100 Loss: 0.643769
Dev loss: 0.0493, Dev accuracy: 418/420 (99.524%)
Train Epoch: 13 [25600/41580 (61%)] LR: 0.000100 Loss: 0.574202
Dev loss: 0.0539, Dev accuracy: 418/420 (99.524%)
Train Epoch: 14 [25600/41580 (61%)] LR: 0.000100 Loss: 0.658280
Dev loss: 0.0659, Dev accuracy: 417/420 (99.286%)
Train Epoch: 15 [25600/41580 (61%)] LR: 0.000100 Loss: 0.478199
Dev loss: 0.0813, Dev accuracy: 418/420 (99.524%)
Train Epoch: 16 [25600/41580 (61%)] LR: 0.000100 Loss: 0.720078
Dev loss: 0.0711, Dev accuracy: 418/420 (99.524%)
Train Epoch: 17 [25600/41580 (61%)] LR: 0.000100 Loss: 0.950047
Dev loss: 0.0689, Dev accuracy: 418/420 (99.524%)
Train Epoch: 18 [25600/41580 (61%)] LR: 0.000100 Loss: 0.543526
Dev loss: 0.0506, Dev accuracy: 417/420 (99.286%)
Train Epoch: 19 [25600/41580 (61%)] LR: 0.000100 Loss: 0.546754
Dev loss: 0.0566, Dev accuracy: 417/420 (99.286%)
Train Epoch: 20 [25600/41580 (61%)] LR: 0.000100 Loss: 0.583502
Dev loss: 0.0457, Dev accuracy: 419/420 (99.762%)
Train Epoch: 21 [25600/41580 (61%)] LR: 0.000100 Loss: 0.590696
Dev loss: 0.0403, Dev accuracy: 419/420 (99.762%)
Train Epoch: 22 [25600/41580 (61%)] LR: 0.000100 Loss: 0.499778
Dev loss: 0.0492, Dev accuracy: 418/420 (99.524%)
Train Epoch: 23 [25600/41580 (61%)] LR: 0.000100 Loss: 0.625026
Dev loss: 0.0988, Dev accuracy: 418/420 (99.524%)
Train Epoch: 24 [25600/41580 (61%)] LR: 0.000100 Loss: 0.569994
Dev loss: 0.0552, Dev accuracy: 419/420 (99.762%)
Train Epoch: 25 [25600/41580 (61%)] LR: 0.000010 Loss: 0.640723
Dev loss: 0.0311, Dev accuracy: 418/420 (99.524%)
Train Epoch: 26 [25600/41580 (61%)] LR: 0.000010 Loss: 0.425440
Dev loss: 0.0438, Dev accuracy: 418/420 (99.524%)
Train Epoch: 27 [25600/41580 (61%)] LR: 0.000010 Loss: 0.634802
Dev loss: 0.0390, Dev accuracy: 418/420 (99.524%)
Train Epoch: 28 [25600/41580 (61%)] LR: 0.000010 Loss: 0.496772
Dev loss: 0.0427, Dev accuracy: 419/420 (99.762%)
Train Epoch: 29 [25600/41580 (61%)] LR: 0.000010 Loss: 0.673567
Dev loss: 0.0374, Dev accuracy: 418/420 (99.524%)
Train Epoch: 30 [25600/41580 (61%)] LR: 0.000010 Loss: 0.548118
Dev loss: 0.0439, Dev accuracy: 419/420 (99.762%)
Train Epoch: 31 [25600/41580 (61%)] LR: 0.000010 Loss: 0.647226
Dev loss: 0.0381, Dev accuracy: 419/420 (99.762%)
Train Epoch: 32 [25600/41580 (61%)] LR: 0.000010 Loss: 0.620449
Dev loss: 0.0368, Dev accuracy: 419/420 (99.762%)
Train Epoch: 33 [25600/41580 (61%)] LR: 0.000010 Loss: 0.652556
Dev loss: 0.0429, Dev accuracy: 419/420 (99.762%)
Train Epoch: 34 [25600/41580 (61%)] LR: 0.000010 Loss: 0.648158
Dev loss: 0.0356, Dev accuracy: 419/420 (99.762%)
Train Epoch: 35 [25600/41580 (61%)] LR: 0.000010 Loss: 0.588523
Dev loss: 0.0337, Dev accuracy: 418/420 (99.524%)
Train Epoch: 36 [25600/41580 (61%)] LR: 0.000010 Loss: 0.653528
Dev loss: 0.0388, Dev accuracy: 419/420 (99.762%)
Train Epoch: 37 [25600/41580 (61%)] LR: 0.000010 Loss: 0.621687
Dev loss: 0.0387, Dev accuracy: 419/420 (99.762%)
Train Epoch: 38 [25600/41580 (61%)] LR: 0.000010 Loss: 0.781208
Dev loss: 0.0348, Dev accuracy: 419/420 (99.762%)
Train Epoch: 39 [25600/41580 (61%)] LR: 0.000010 Loss: 0.571978
Dev loss: 0.0363, Dev accuracy: 419/420 (99.762%)
Train Epoch: 40 [25600/41580 (61%)] LR: 0.000010 Loss: 0.658363
Dev loss: 0.0382, Dev accuracy: 419/420 (99.762%)
Train Epoch: 41 [25600/41580 (61%)] LR: 0.000010 Loss: 0.395222
Dev loss: 0.0337, Dev accuracy: 419/420 (99.762%)
Train Epoch: 42 [25600/41580 (61%)] LR: 0.000010 Loss: 0.383706
Dev loss: 0.0479, Dev accuracy: 418/420 (99.524%)
Train Epoch: 43 [25600/41580 (61%)] LR: 0.000010 Loss: 0.622156
Dev loss: 0.0473, Dev accuracy: 418/420 (99.524%)
Train Epoch: 44 [25600/41580 (61%)] LR: 0.000010 Loss: 0.667140
Dev loss: 0.0398, Dev accuracy: 419/420 (99.762%)
Train Epoch: 45 [25600/41580 (61%)] LR: 0.000010 Loss: 0.561900
Dev loss: 0.0371, Dev accuracy: 419/420 (99.762%)
Train Epoch: 46 [25600/41580 (61%)] LR: 0.000010 Loss: 0.243738
Dev loss: 0.0436, Dev accuracy: 418/420 (99.524%)
Train Epoch: 47 [25600/41580 (61%)] LR: 0.000010 Loss: 0.461719
Dev loss: 0.0400, Dev accuracy: 419/420 (99.762%)
Train Epoch: 48 [25600/41580 (61%)] LR: 0.000010 Loss: 0.600437
Dev loss: 0.0349, Dev accuracy: 419/420 (99.762%)
Train Epoch: 49 [25600/41580 (61%)] LR: 0.000010 Loss: 0.571216
Dev loss: 0.0428, Dev accuracy: 419/420 (99.762%)
Train Epoch: 50 [25600/41580 (61%)] LR: 0.000001 Loss: 0.655558
Dev loss: 0.0350, Dev accuracy: 419/420 (99.762%)
Train Epoch: 51 [25600/41580 (61%)] LR: 0.000001 Loss: 0.619675
Dev loss: 0.0471, Dev accuracy: 419/420 (99.762%)
Train Epoch: 52 [25600/41580 (61%)] LR: 0.000001 Loss: 0.427025
Dev loss: 0.0441, Dev accuracy: 419/420 (99.762%)
Train Epoch: 53 [25600/41580 (61%)] LR: 0.000001 Loss: 0.625682
Dev loss: 0.0383, Dev accuracy: 419/420 (99.762%)
Train Epoch: 54 [25600/41580 (61%)] LR: 0.000001 Loss: 0.589247
Dev loss: 0.0379, Dev accuracy: 419/420 (99.762%)
Train Epoch: 55 [25600/41580 (61%)] LR: 0.000001 Loss: 0.479040
Dev loss: 0.0333, Dev accuracy: 419/420 (99.762%)
Train Epoch: 56 [25600/41580 (61%)] LR: 0.000001 Loss: 0.609528
Dev loss: 0.0388, Dev accuracy: 419/420 (99.762%)
Train Epoch: 57 [25600/41580 (61%)] LR: 0.000001 Loss: 0.186089
Dev loss: 0.0350, Dev accuracy: 419/420 (99.762%)
Train Epoch: 58 [25600/41580 (61%)] LR: 0.000001 Loss: 0.658882
Dev loss: 0.0368, Dev accuracy: 419/420 (99.762%)
Train Epoch: 59 [25600/41580 (61%)] LR: 0.000001 Loss: 0.613059
Dev loss: 0.0343, Dev accuracy: 419/420 (99.762%)
Train Epoch: 60 [25600/41580 (61%)] LR: 0.000001 Loss: 0.541516
Dev loss: 0.0390, Dev accuracy: 419/420 (99.762%)
Train Epoch: 61 [25600/41580 (61%)] LR: 0.000001 Loss: 0.619850
Dev loss: 0.0344, Dev accuracy: 419/420 (99.762%)
Train Epoch: 62 [25600/41580 (61%)] LR: 0.000001 Loss: 0.594599
Dev loss: 0.0352, Dev accuracy: 419/420 (99.762%)
Train Epoch: 63 [25600/41580 (61%)] LR: 0.000001 Loss: 0.623573
Dev loss: 0.0344, Dev accuracy: 419/420 (99.762%)
Train Epoch: 64 [25600/41580 (61%)] LR: 0.000001 Loss: 0.320872
Dev loss: 0.0379, Dev accuracy: 419/420 (99.762%)
Train Epoch: 65 [25600/41580 (61%)] LR: 0.000001 Loss: 0.634984
Dev loss: 0.0334, Dev accuracy: 419/420 (99.762%)
Train Epoch: 66 [25600/41580 (61%)] LR: 0.000001 Loss: 0.458917
Dev loss: 0.0349, Dev accuracy: 419/420 (99.762%)
Train Epoch: 67 [25600/41580 (61%)] LR: 0.000001 Loss: 0.608051
Dev loss: 0.0354, Dev accuracy: 419/420 (99.762%)
Train Epoch: 68 [25600/41580 (61%)] LR: 0.000001 Loss: 0.739585
Dev loss: 0.0398, Dev accuracy: 419/420 (99.762%)
Train Epoch: 69 [25600/41580 (61%)] LR: 0.000001 Loss: 0.393449
Dev loss: 0.0321, Dev accuracy: 419/420 (99.762%)
Train Epoch: 70 [25600/41580 (61%)] LR: 0.000001 Loss: 0.665298
Dev loss: 0.0432, Dev accuracy: 419/420 (99.762%)
Train Epoch: 71 [25600/41580 (61%)] LR: 0.000001 Loss: 0.424112
Dev loss: 0.0356, Dev accuracy: 419/420 (99.762%)
Train Epoch: 72 [25600/41580 (61%)] LR: 0.000001 Loss: 0.483833
Dev loss: 0.0359, Dev accuracy: 419/420 (99.762%)
Train Epoch: 73 [25600/41580 (61%)] LR: 0.000001 Loss: 0.642284
Dev loss: 0.0409, Dev accuracy: 419/420 (99.762%)
Train Epoch: 74 [25600/41580 (61%)] LR: 0.000001 Loss: 0.524682
Dev loss: 0.0330, Dev accuracy: 419/420 (99.762%)
Train Epoch: 75 [25600/41580 (61%)] LR: 0.000000 Loss: 0.642408
Dev loss: 0.0365, Dev accuracy: 418/420 (99.524%)
Train Epoch: 76 [25600/41580 (61%)] LR: 0.000000 Loss: 0.817751
Dev loss: 0.0361, Dev accuracy: 419/420 (99.762%)
Train Epoch: 77 [25600/41580 (61%)] LR: 0.000000 Loss: 0.571057
Dev loss: 0.0339, Dev accuracy: 419/420 (99.762%)
Train Epoch: 78 [25600/41580 (61%)] LR: 0.000000 Loss: 0.496010
Dev loss: 0.0369, Dev accuracy: 419/420 (99.762%)
Train Epoch: 79 [25600/41580 (61%)] LR: 0.000000 Loss: 0.637652
Dev loss: 0.0370, Dev accuracy: 419/420 (99.762%)
Train Epoch: 80 [25600/41580 (61%)] LR: 0.000000 Loss: 0.371472
Dev loss: 0.0375, Dev accuracy: 419/420 (99.762%)
Train Epoch: 81 [25600/41580 (61%)] LR: 0.000000 Loss: 0.632419
Dev loss: 0.0338, Dev accuracy: 419/420 (99.762%)
Train Epoch: 82 [25600/41580 (61%)] LR: 0.000000 Loss: 0.665854
Dev loss: 0.0345, Dev accuracy: 419/420 (99.762%)
Train Epoch: 83 [25600/41580 (61%)] LR: 0.000000 Loss: 0.588508
Dev loss: 0.0370, Dev accuracy: 419/420 (99.762%)
Train Epoch: 84 [25600/41580 (61%)] LR: 0.000000 Loss: 0.581868
Dev loss: 0.0375, Dev accuracy: 419/420 (99.762%)
Train Epoch: 85 [25600/41580 (61%)] LR: 0.000000 Loss: 0.635964
Dev loss: 0.0296, Dev accuracy: 419/420 (99.762%)
Train Epoch: 86 [25600/41580 (61%)] LR: 0.000000 Loss: 0.699944
Dev loss: 0.0329, Dev accuracy: 419/420 (99.762%)
Train Epoch: 87 [25600/41580 (61%)] LR: 0.000000 Loss: 0.866631
Dev loss: 0.0363, Dev accuracy: 419/420 (99.762%)
Train Epoch: 88 [25600/41580 (61%)] LR: 0.000000 Loss: 0.676538
Dev loss: 0.0330, Dev accuracy: 419/420 (99.762%)
Train Epoch: 89 [25600/41580 (61%)] LR: 0.000000 Loss: 0.483626
Dev loss: 0.0413, Dev accuracy: 419/420 (99.762%)
Train Epoch: 90 [25600/41580 (61%)] LR: 0.000000 Loss: 0.514636
Dev loss: 0.0342, Dev accuracy: 419/420 (99.762%)
Train Epoch: 91 [25600/41580 (61%)] LR: 0.000000 Loss: 0.360000
Dev loss: 0.0327, Dev accuracy: 419/420 (99.762%)
Train Epoch: 92 [25600/41580 (61%)] LR: 0.000000 Loss: 0.606525
Dev loss: 0.0364, Dev accuracy: 419/420 (99.762%)
Train Epoch: 93 [25600/41580 (61%)] LR: 0.000000 Loss: 0.628101
Dev loss: 0.0381, Dev accuracy: 419/420 (99.762%)
Train Epoch: 94 [25600/41580 (61%)] LR: 0.000000 Loss: 0.794253
Dev loss: 0.0306, Dev accuracy: 418/420 (99.524%)
Train Epoch: 95 [25600/41580 (61%)] LR: 0.000000 Loss: 0.640584
Dev loss: 0.0355, Dev accuracy: 419/420 (99.762%)
Train Epoch: 96 [25600/41580 (61%)] LR: 0.000000 Loss: 0.458734
Dev loss: 0.0340, Dev accuracy: 419/420 (99.762%)
Train Epoch: 97 [25600/41580 (61%)] LR: 0.000000 Loss: 0.532387
Dev loss: 0.0396, Dev accuracy: 419/420 (99.762%)
Train Epoch: 98 [25600/41580 (61%)] LR: 0.000000 Loss: 0.562182
Dev loss: 0.0340, Dev accuracy: 419/420 (99.762%)
Train Epoch: 99 [25600/41580 (61%)] LR: 0.000000 Loss: 0.623679
Dev loss: 0.0341, Dev accuracy: 419/420 (99.762%)
CPU times: user 32min 18s, sys: 12min 2s, total: 44min 21s
Wall time: 59min 47s
history['train_loss'].plot();
history.dropna()['dev_loss'].plot();
history.dropna()['dev_accuracy'].plot();
print('max', history.dropna()['dev_accuracy'].max())
print('max in last 5', history.dropna()['dev_accuracy'].iloc[-5:].max())
print('avg in last 5', history.dropna()['dev_accuracy'].iloc[-5:].mean())
max 1.0
max in last 5 0.9976190476190476
avg in last 5 0.9976190476190476
model.eval()
predictions = []
for data in tqdm(test_loader):
data = data.to(device)
output = model(data).max(dim=1)[1] # argmax
predictions += list(output.data.cpu().numpy())