图像识别分类 代码

打算做一套图像识别都适用的代码,放在博客方便copy

导包

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sklearn
import sys
import tensorflow as tf
import time

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
print(module.__name__, module.__version__)

# import PIL
# import IPython
# import kerastuner as kt

导入文件

1
2
3
4
5
6
7
8
9
10
11
train_dir = "G:/dataset/rgbdsm/rgdsm/256_256/three grades-newGan/train"
test_dir = "G:/dataset/rgbdsm/rgdsm/256_256/three grades-Gan/val"
val_dir = "G:/dataset/rgbdsm/rgdsm/256_256/three grades-newGan/test"

print(os.path.exists(train_dir))
print(os.path.exists(test_dir))
print(os.path.exists(val_dir))

print(os.listdir(train_dir))
print(os.listdir(test_dir))
print(os.listdir(val_dir))

设置参数

1
2
3
4
5
6
7
8
9
width = 256
height = 256
channels = 3

epochs = 40
num_classes = 3
batch_size = 32

save_path = './save_weights/3_{epoch:02d}-{val_accuracy:.2f}.h5'

数据增强

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
train_datagen = keras.preprocessing.image.ImageDataGenerator(
# rescale=1./255,
# preprocessing_function=keras.applications.efficientnet.preprocess_input,
preprocessing_function=keras.applications.densenet.preprocess_input,
# rotation_range=3,
# width_shift_range=0.02,
# height_shift_range=0.02,
# zoom_range=0.1,
# horizontal_flip=True,
# vertical_flip=True,
# brightness_range = (0.95, 1.05),
# fill_mode='nearest',
# validation_split=0.2
)
train_generator = train_datagen.flow_from_directory(train_dir,
target_size=(height, width),
batch_size= batch_size,
seed=7,
shuffle=True,
color_mode="rgb",
# subset="training",
class_mode="categorical")
val_datagen = keras.preprocessing.image.ImageDataGenerator(
# rescale=1./255,
preprocessing_function=keras.applications.densenet.preprocess_input,
)
val_generator = val_datagen.flow_from_directory(val_dir,
target_size=(height, width),
batch_size=batch_size,
seed=7,
shuffle=False,
color_mode="rgb",
# subset="validation",
class_mode="categorical")
test_datagen = keras.preprocessing.image.ImageDataGenerator(
# rescale=1./255,
preprocessing_function=keras.applications.densenet.preprocess_input,
)
test_generator = test_datagen.flow_from_directory(test_dir,
target_size=(height, width),
batch_size=batch_size,
seed=7,
shuffle=False,
color_mode="rgb",
# subset="validation",
class_mode="categorical")
train_num = train_generator.samples
val_num = val_generator.samples
test_num = test_generator.samples

print(train_num, val_num)
print(train_generator.class_indices)

for i in range(2):
x, y = train_generator.next()
print(x.shape, y.shape)
# print(y)

展示图片

1
2
3
4
5
6
7
8
9
10
11
12
class_names = ['0-30', '31-60', '61-100']

plt.figure(figsize=(8,8))
for i in range(16):
x = 1
plt.subplot(4,4,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(train_generator[i][0][x]/255, cmap=plt.cm.binary)
plt.xlabel(class_names[np.argmax(train_generator[i][1][x])])
plt.show()

多分类平衡数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
filenames = tf.io.gfile.glob(str(train_dir + '/*/*'))

COUNT_1 = len([filename for filename in filenames if "0-30" in filename])
print("0-30 images count in training set: " + str(COUNT_1))

COUNT_2 = len([filename for filename in filenames if "31-60" in filename])
print("31-60 images count in training set: " + str(COUNT_2))

COUNT_3 = len([filename for filename in filenames if "61-100" in filename])
print("61-100 images count in training set: " + str(COUNT_3))

# 平衡数据集
weight_for_0 = (1 / COUNT_1)*(train_num)/3.0
weight_for_1 = (1 / COUNT_2)*(train_num)/3.0
weight_for_2 = (1 / COUNT_3)*(train_num)/3.0

class_weight = {0: weight_for_0, 1: weight_for_1, 2: weight_for_2}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))
print('Weight for class 2: {:.2f}'.format(weight_for_2))

回调函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.callbacks import TensorBoard

lr_callback = ReduceLROnPlateau(monitor='val_loss',
patience=3,
verbose=1,
factor=0.2,
min_lr=0.000001)
# 设置模型保存路径,且根据val_acc保存最优模型
checkpoint = ModelCheckpoint(filepath=save_path,
monitor='val_accuracy',
save_weights_only=False,
save_best_only=True,
mode='auto',
period=1)
tbCallBack = TensorBoard(log_dir='g:/logs')
callbacks = [lr_callback
checkpoint,
# tbCallBack
]

构建模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# def create_model(hp):
def create_model():

pretrained_model = tf.keras.applications.DenseNet201(weights='imagenet',
input_shape=[width, height, channels],
include_top=False)
pretrained_model.trainable = True

# hp_units = hp.Int('units', min_value = 32, max_value = 512, step = 32)
model = tf.keras.Sequential([
# tf.keras.layers.InputLayer(input_shape=[width, height, channels]),
pretrained_model,
# tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same',
# activation='relu'),
# tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same',
# activation='relu'),
# tf.keras.layers.MaxPool2D(pool_size=2),
# tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same',
# activation='relu'),
# tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same',
# activation='relu'),
# tf.keras.layers.MaxPool2D(pool_size=2),
# tf.keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',
# activation='relu'),
# tf.keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',
# activation='relu'),
# tf.keras.layers.MaxPool2D(pool_size=2),
# tf.keras.layers.BatchNormalization(),
# tf.keras.layers.Dropout(0.5),
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(units = 32, activation = 'relu'),
# tf.keras.layers.Dense(units = 256, activation = 'relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(num_classes, activation='softmax',
# kernel_regularizer='l2'
)
])

# hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3, 1e-4])
model.compile(
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001),
# optimizer=tf.keras.optimizers.Adam(lr=lr_schedule(0)),
# optimizer='sgd',
# optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)

return model

训练模型

1
2
3
4
5
6
7
8
9
10
11
12
model = create_model()
model.summary()
history1 = model.fit(train_generator,
steps_per_epoch=train_num // batch_size,
validation_data=val_generator,
validation_steps=val_num // batch_size,
epochs=epochs,
verbose=1,
# class_weight=class_weight,
callbacks=callbacks
)
# 终端 tensorboard --logdir=g:/logs

绘制曲线

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
def display_training_curves(training, validation, title, subplot):
if subplot % 10 == 1: # set up the subplots on the first call
plt.subplots(figsize=(10, 10), facecolor='#F0F0F0')
plt.tight_layout()
ax = plt.subplot(subplot)
ax.set_facecolor('#F8F8F8')
ax.plot(training)
ax.plot(validation)
ax.set_title('model ' + title)
ax.set_ylabel(title)
# ax.set_ylim(0.28,1.05)
ax.set_xlabel('epoch')
ax.legend(['train', 'valid.'])

final_accuracy = history1.history["val_accuracy"][-5:]
print("FINAL ACCURACY MEAN-5: ", np.mean(final_accuracy))
display_training_curves(history1.history['accuracy'][1:], history1.history['val_accuracy'][1:], 'accuracy', 211)
display_training_curves(history1.history['loss'][1:], history1.history['val_loss'][1:], 'loss', 212)

评估模型

1
2
3
4
5
6
7
# file_path = './save_weights/3_17-0.84.h5'
# model.load_weights(file_path)

val_loss, val_acc = model.evaluate(test_generator)

print('\nTest accuracy:', val_acc)
print('\nTest loss:', val_loss)

绘制混淆矩阵

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from sklearn.metrics import confusion_matrix
import itertools
def plot_confusion_matrix(cm, target_names,title='Confusion matrix',cmap=None,normalize=False):
accuracy = np.trace(cm) / float(np.sum(cm)) #计算准确率
misclass = 1 - accuracy #计算错误率
if cmap is None:
cmap = plt.get_cmap('Blues') #颜色设置成蓝色
plt.figure(figsize=(6, 4)) #设置窗口尺寸
plt.imshow(cm, interpolation='nearest', cmap=cmap) #显示图片
plt.title(title) #显示标题
plt.colorbar() #绘制颜色条

if target_names is not None:
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45) #x坐标标签旋转45度
plt.yticks(tick_marks, target_names) #y坐标

if normalize:
cm = cm.astype('float32') / cm.sum(axis=1)
cm = np.round(cm,2) #对数字保留两位小数


thresh = cm.max() / 1.5 if normalize else cm.max() / 2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): #将cm.shape[0]、cm.shape[1]中的元素组成元组,遍历元组中每一个数字
if normalize: #标准化
plt.text(j, i, "{:0.2f}".format(cm[i, j]), #保留两位小数
horizontalalignment="center", #数字在方框中间
color="white" if cm[i, j] > thresh else "black") #设置字体颜色
else: #非标准化
plt.text(j, i, "{:,}".format(cm[i, j]),
horizontalalignment="center", #数字在方框中间
color="white" if cm[i, j] > thresh else "black") #设置字体颜色

plt.tight_layout() #自动调整子图参数,使之填充整个图像区域
plt.ylabel('True label') #y方向上的标签
plt.xlabel("Predicted label\naccuracy={:0.4f}\n misclass={:0.4f}".format(accuracy, misclass)) #x方向上的标签
plt.show() #显示图片

labels = ['0-30', '31-60', '60-100']

# 预测验证集数据整体准确率
Y_pred = model.predict(test_generator, test_num // batch_size + 1)
# 将预测的结果转化为one hit向量
Y_pred_classes = np.argmax(Y_pred, axis = 1)
# 计算混淆矩阵
confusion_mtx = confusion_matrix(y_true = test_generator.classes,y_pred = Y_pred_classes)
# 绘制混淆矩阵
plot_confusion_matrix(confusion_mtx, normalize=True, target_names=labels)