Skip to content

Commit ba279ec

Browse files
committed
add lab
1 parent 1a0d5a8 commit ba279ec

File tree

5 files changed

+270
-0
lines changed

5 files changed

+270
-0
lines changed

lab/__Init__.py

Whitespace-only changes.

lab/dataset.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import numpy as np
2+
3+
from mxnet import nd
4+
from mxnet.gluon import data as gdata
5+
from mxnet.gluon.data.vision import transforms as gtf
6+
7+
from datatools import Loader
8+
9+
# 数据增强
10+
transform_train = gtf.Compose([
11+
# 随机对图像裁剪出面积为原图像面积0.08~1倍、且高和宽之比在3/4~4/3的图像,再放缩为高和
12+
# 宽都是为 224 的新图
13+
gtf.RandomResizedCrop(
14+
224, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0)),
15+
gtf.RandomFlipLeftRight(),
16+
# 随机变化亮度、对比度和饱和度
17+
gtf.RandomColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
18+
# 随机加噪声
19+
gtf.RandomLighting(0.1),
20+
gtf.ToTensor(),
21+
# 对图像的每个通道做标准化
22+
gtf.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
23+
])
24+
25+
transform_test = gtf.Compose([
26+
gtf.Resize(256),
27+
# 将图像中央的高和宽均为 224 的正方形区域裁剪出来
28+
gtf.CenterCrop(224),
29+
gtf.ToTensor(),
30+
gtf.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
31+
])
32+
33+
34+
class SimpleDataset:
35+
def __init__(self, name, numLabels, root='E:/xdata/X.h5'):
36+
import tables as tb
37+
h5 = tb.open_file(root)
38+
self.name = name
39+
self._dataset = h5.root[name]
40+
self.label_names = self._get_label_names(is_fine_labels=False)
41+
self._split(numLabels)
42+
self.testX, self.testY = self._dataset.testX[:], self._dataset.testY[:]
43+
h5.close()
44+
45+
def _get_label_names(self, is_fine_labels=False):
46+
if self.name != 'cifar100':
47+
return np.asanyarray(self._dataset.label_names, dtype='U')
48+
elif is_fine_labels:
49+
return np.asanyarray(self._dataset.fine_label_names, dtype='U')
50+
else:
51+
return np.asanyarray(self._dataset.coarse_label_names, dtype='U')
52+
53+
def _split(self, numLabels):
54+
from sklearn.model_selection import train_test_split
55+
xTr = self._dataset.trainX[:]
56+
yTr = self._dataset.trainY[:]
57+
test_size = xTr.shape[0] - numLabels
58+
# 数据集划分操作
59+
self.trainX, self.valX, self.trainY, self.valY = train_test_split(
60+
xTr, yTr, test_size=test_size, shuffle=True)
61+
62+
63+
class AugLoader(Loader, gdata.Dataset):
64+
def __init__(self, batch_size, X, Y=None, shuffle=True, *args, **kwargs):
65+
super().__init__(batch_size, X, Y=None, shuffle=True, *args, **kwargs)
66+
self.X = nd.array(X[:])
67+
if not Y is None:
68+
self.Y = nd.array(Y[:])
69+
70+
def aug_imgs(self, imgs):
71+
'''
72+
对 图像做数据增强 预处理
73+
dataset 需要有 type 属性('train', 'test')
74+
'''
75+
transforms_dict = {'train': transform_train, 'test': transform_test}
76+
return nd.stack(*[transforms_dict[self.type](img) for img in imgs])
77+
78+
def __iter__(self):
79+
idx = np.arange(self.nrows)
80+
if self.type == 'train':
81+
np.random.shuffle(idx)
82+
for start in range(0, self.nrows, self.batch_size):
83+
end = min(start + self.batch_size, self.nrows)
84+
K = nd.array(idx[start:end])
85+
if self.Y is None:
86+
yield self.aug_imgs(self.X.take(K, 0))
87+
else:
88+
yield self.aug_imgs(self.X.take(K, 0)), self.Y.take(K, 0)

lab/datatools.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import numpy as np
2+
3+
4+
class Loader(dict):
5+
"""
6+
方法
7+
========
8+
L 为该类的实例
9+
len(L)::返回 batch 的批数
10+
iter(L)::即为数据迭代器
11+
12+
参数
13+
=============
14+
type: 'train', 'test'
15+
16+
Return
17+
========
18+
可迭代对象(numpy 对象)
19+
"""
20+
21+
def __init__(self, batch_size, X, Y=None, shuffle=True, *args, **kwargs):
22+
'''
23+
X, Y 均为类 numpy, 可以是 HDF5
24+
'''
25+
super().__init__(*args, **kwargs)
26+
self.__dict__ = self
27+
self.batch_size = batch_size
28+
if shuffle:
29+
self.type = 'train'
30+
else:
31+
self.type = 'test'
32+
33+
if not hasattr(X, 'take'):
34+
self.X = X[:]
35+
else:
36+
self.X = X
37+
self.nrows = len(self.X)
38+
if Y is not None:
39+
if not hasattr(Y, 'take'):
40+
self.Y = Y[:]
41+
else:
42+
self.Y = None
43+
44+
def __iter__(self):
45+
idx = np.arange(self.nrows)
46+
if self.type == 'train':
47+
np.random.shuffle(idx)
48+
49+
for start in range(0, self.nrows, self.batch_size):
50+
end = min(start + self.batch_size, self.nrows)
51+
K = idx[start:end].tolist()
52+
if self.Y is None:
53+
yield self.X.take(K, axis=0)
54+
else:
55+
yield self.X.take(K, axis=0), self.Y.take(K, axis=0)
56+
57+
def __len__(self):
58+
return round(self.nrows / self.batch_size) # 向上取整

lab/gluontools.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import time
2+
3+
from mxnet import metric, autograd
4+
from mxnet.gluon import loss as gloss, Trainer
5+
from gluoncv.utils import TrainingHistory # 可视化
6+
7+
from utils import make_dirs # 创建多个目录
8+
9+
10+
class Tools:
11+
def __init__(self, datasetName):
12+
self._get_result_dir(datasetName)
13+
14+
def _get_result_dir(self, datasetName):
15+
self.modelDir = f'models/{datasetName}'
16+
self.resultDir = f'results/{datasetName}'
17+
make_dirs(self.modelDir, self.resultDir)
18+
19+
20+
def evaluate_loss(data_iter, net, ctx, loss):
21+
l_sum, n = 0.0, 0
22+
#loss = gloss.SoftmaxCrossEntropyLoss()
23+
for X, y in data_iter:
24+
y = y.as_in_context(ctx).astype('float32') # 模型的输出是 float32 类型数据
25+
outputs = net(X.as_in_context(ctx)) # 模型的输出
26+
l_sum += loss(outputs, y).sum().asscalar() # 计算总损失
27+
n += y.size # 计算样本数
28+
return l_sum / n # 计算平均损失
29+
30+
31+
def test(valid_iter, net, ctx):
32+
val_metric = metric.Accuracy()
33+
for X, y in valid_iter:
34+
X = X.as_in_context(ctx)
35+
y = y.as_in_context(ctx).astype('float32') # 模型的输出是 float32 类型数据
36+
outputs = net(X)
37+
val_metric.update(y, outputs)
38+
return val_metric.get()
39+
40+
41+
def get_result_dirs(datasetName):
42+
tools = Tools(datasetName)
43+
return tools.modelDir, tools.resultDir
44+
45+
46+
def train(ctx,
47+
loss,
48+
trainer,
49+
datasetName,
50+
modelName,
51+
net,
52+
train_iter,
53+
valid_iter,
54+
num_epochs,
55+
n_retrain_epoch=0):
56+
'''
57+
n_retrain_epoch 是从第 n_retrain_epoch 次开始训练模型
58+
'''
59+
train_metric = metric.Accuracy()
60+
train_history = TrainingHistory(['training-error', 'validation-error'])
61+
best_val_score = 0
62+
modelDir, resultDir = get_result_dirs(datasetName)
63+
for epoch in range(num_epochs):
64+
train_l_sum, n, start = 0.0, 0, time.time() # 计时开始
65+
train_metric.reset()
66+
for X, y in train_iter:
67+
X = X.as_in_context(ctx)
68+
y = y.as_in_context(ctx).astype('float32') # 模型的输出是 float32 类型数据
69+
with autograd.record(): # 记录梯度信息
70+
outputs = net(X) # 模型输出
71+
L = loss(outputs, y)
72+
l = L.mean() # 计算总损失
73+
l.backward() # 反向传播
74+
trainer.step(1)
75+
train_l_sum += L.sum().asscalar() # 计算该批量的总损失
76+
train_metric.update(y, outputs) # 计算训练精度
77+
n += y.size
78+
_, train_acc = train_metric.get()
79+
time_s = "time {:.2f} sec".format(time.time() - start) # 计时结束
80+
valid_loss = evaluate_loss(valid_iter, net, ctx, loss) # 计算验证集的平均损失
81+
_, val_acc = test(valid_iter, net, ctx) # 计算验证集的精度
82+
epoch_s = (
83+
"epoch {:d}, train loss {:.5f}, valid loss {:.5f}, train acc {:.5f}, valid acc {:.5f}, ".
84+
format(n_retrain_epoch + epoch, train_l_sum / n, valid_loss,
85+
train_acc, val_acc))
86+
print(epoch_s + time_s)
87+
train_history.update([1 - train_acc, 1 - val_acc]) # 更新图像的纵轴
88+
train_history.plot(save_path=f'{resultDir}/{modelName}_history.png') # 实时更新图像
89+
if abs(train_acc-val_acc)>.3:
90+
break
91+
if val_acc > best_val_score: # 保存比较好的模型
92+
best_val_score = val_acc
93+
net.save_parameters('{}/{:.4f}-{}-{:d}-best.params'.format(
94+
modelDir, best_val_score, modelName, n_retrain_epoch + epoch))
95+
96+
97+
def train_fine_tuning(datasetName,
98+
modelName,
99+
learning_rate,
100+
net,
101+
train_iter,
102+
valid_iter,
103+
num_epochs,
104+
n_retrain_epoch=0):
105+
import d2lzh as d2l
106+
ctx = d2l.try_all_gpus()[0]
107+
net.collect_params().reset_ctx(ctx)
108+
net.hybridize()
109+
loss = gloss.SoftmaxCrossEntropyLoss()
110+
trainer = Trainer(net.collect_params(), 'sgd', {
111+
'learning_rate': learning_rate,
112+
'wd': 0.001
113+
})
114+
train(ctx, loss, trainer, datasetName, modelName, net, train_iter,
115+
valid_iter, num_epochs, n_retrain_epoch)

lab/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import os
2+
3+
def make_dirs(*dir_names):
4+
'''
5+
创建多个目录
6+
'''
7+
for dir_name in dir_names:
8+
if not os.path.exists(dir_name):
9+
os.makedirs(dir_name)

0 commit comments

Comments
 (0)