as tf
from tensorflow import keras
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import glob
import os
train_image_path = glob.glob("./dc/maogou/*.jpg")
len(train_image_path)

train_image_path[995:1005]

例子通过提取图片名字拆分定义该图是猫是狗
p = "./dc/maogou\cat.999.jpg"
p.split("\")[1].split(".")[0]

int(p.split("\")[1].split(".")[0]=="cat")

p = "./dc/maogou\dog.11500.jpg"
p.split("\")[1].split(".")[0]
int(p.split("\")[1].split(".")[0]=="cat")

train_image_label = [int(p.split("\")[1].split(".")[0]=="cat") for p in train_image_path]
train_image_label[995:1005]

def load_preprosess_image(path,lable):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image,channels=3)
image = tf.image.resize(image,[256,256])
image = tf.cast(image,tf.float32)
image = image/255
lable = tf.reshape(lable,[1])
return image,lable
tf.image.convert_image_dtype方法将一个uint类型的tensor转换为float类型时,该方法会自动对数据进行归一化处理(如果是float类型图片使用该方法则不会归一化处理),将数据缩放到0-1范围内,如果没有注意到这点,之后在进行网络训练时会发现网络不收敛、不训练。
train_image_ds = tf.data.Dataset.from_tensor_slices((train_image_path,train_image_label))
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_image_ds = train_image_ds.map(load_preprosess_image,num_parallel_calls=AUTOTUNE)
train_image_ds

BATCH_SIZE = 64
train_count = len(train_image_path)
train_image_ds = train_image_ds.shuffle(train_count).batch(BATCH_SIZE)
train_image_ds = train_image_ds.prefetch(AUTOTUNE)
例子如果是猫 = 1 是狗 = 0

test_image_path = glob.glob("./dc/maogou_test/*.jpg")
test_image_label = [int(p.split("\")[1].split(".")[0]=="cat") for p in test_image_path]
test_image_ds = tf.data.Dataset.from_tensor_slices((test_image_path,test_image_label))
test_image_ds = test_image_ds.map(load_preprosess_image,num_parallel_calls=AUTOTUNE)
test_image_ds = test_image_ds.batch(BATCH_SIZE)
test_image_ds = test_image_ds.prefetch(AUTOTUNE)
model = keras.Sequential([
tf.keras.layers.Conv2D(64,(3,3),input_shape= (256,256,3),activation = "relu"),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(128,(3,3),activation = "relu"),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(256,(3,3),activation = "relu"),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(512,(3,3),activation = "relu"),
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(256,activation="relu"),
tf.keras.layers.Dense(1)
])
model.summary()

例子调用模型查看预测结果 (未训练模型)


optimizer = tf.keras.optimizers.Adam()
epoch_loss_avg = tf.keras.metrics.Mean("train_loss")
train_accuracy = tf.keras.metrics.Accuracy("acc")
epoch_loss_avg_test = tf.keras.metrics.Mean("test_loss")
test_accuracy = tf.keras.metrics.Accuracy("acc")
def train_step(model,images,labels):
with tf.GradientTape() as t:
pred = model(images)
loss_step = tf.keras.losses.BinaryCrossentropy(from_logits=True)(labels,pred)
grads = t.gradient(loss_step,model.trainable_variables)
optimizer.apply_gradients(zip(grads,model.trainable_variables))
epoch_loss_avg(loss_step)
train_accuracy(labels,tf.cast(pred>0,tf.int32))
def test_step(model,images,labels):
pred = model.predict(images)
loss_step = tf.keras.losses.BinaryCrossentropy(from_logits=True)(labels,pred)
epoch_loss_avg_test(loss_step)
test_accuracy(labels,tf.cast(pred>0,tf.int32))
train_loss_results = []
train_acc_results = []
test_loss_results = []
test_acc_results = []
num_epochs = 10
for epoch in range(num_epochs):
for imgs_,labels_ in train_image_ds:
train_step(model,imgs_,labels_)
print(">",end = "")
print()
train_loss_results.append(epoch_loss_avg.result())
train_acc_results.append(train_accuracy.result())
for imgs_,labels_ in test_image_ds:
test_step(model,imgs_,labels_)
test_loss_results.append(epoch_loss_avg_test.result())
test_acc_results.append(test_accuracy.result())
print("Epoch:{}--:loss:{:.3f}--accuracy:{:.3f}=>test_loss:{:.3f}--test_accuracy:{:.3f}".format(
epoch + 1,
epoch_loss_avg.result(),
train_accuracy.result(),
epoch_loss_avg_test.result(),
test_accuracy.result(),
))
epoch_loss_avg.reset_states()
train_accuracy.reset_states()
epoch_loss_avg_test.reset_states()
test_accuracy.reset_states()

模型优化解决欠拟合及图像增强解决过拟合

def load_preprosess_image(path,lable):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image,channels=3)
image = tf.image.resize(image,[360,360])
image = tf.image.random_crop(image,[256,256,3])
image = tf.image.random_flip_left_right(image)
image = tf.image.random_flip_up_down(image)
image = tf.image.random_brightness(image,0.5)
image = tf.image.random_contrast(image,0,1)
image = tf.cast(image,tf.float32)
image = image/255
lable = tf.reshape(lable,[1])
return image,lable

model = keras.Sequential([
tf.keras.layers.Conv2D(64,(3,3),input_shape= (256,256,3),activation = "relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2D(64,(3,3),input_shape= (256,256,3),activation = "relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(128,(3,3),activation = "relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2D(128,(3,3),activation = "relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(256,(3,3),activation = "relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2D(256,(3,3),activation = "relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(512,(3,3),activation = "relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2D(512,(3,3),activation = "relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(1024,(3,3),activation = "relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2D(1024,(3,3),activation = "relu"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(256,activation="relu"),
tf.keras.layers.Dense(1)
])
test 数据集无需图像增强

流行的CNN架构
VGG
VGG全称是Visual Geometry Group,属于牛津大学科学工程系,其发布了一些列以VGG开头的卷积网络模型,可以应用在人脸识别、图像分类等方面,分别从VGG16~VGG19。
VGG研究卷积网络深度的初衷是想搞清楚卷积网络深度是如何影响大规模图像分类与识别的精度和准确率的,最初是VGG-16, 号称非常深的卷积网络全称为(GG-Very-Deep-16 CNN) 。
VGG在加深网络层数同时为了避免参数过多,在所有层都采用3x3的小卷积核,卷积层步长被设置为1。VGG的输入被设置为224x244大小的RGB图像,在训练集图像上对所有图像计算RGB均值,然后把图像作为输入传入VGG卷积网络,使用3x3或者1x1的filter,卷积步长被固定1。
VGG全连接层有3层,根据卷积层+全连接层总数目的不同可以从VGG11 ~ VGG19,最少的VGG11有 8个卷积层与3个全连接层,最多的VGG19有16个卷积层+3个全连接层,此外VGG网络并不是在每个卷积层后面跟上一个池化层,还是总数5个池化层,分布在不同的卷积层之下.
conv表示卷积层
FC表示全连接层(dense)
Conv3 表示卷积层使用3x3 filters
conv3-64表示 深度64
maxpool表示最大池化
在实际处理中还可以对第一个全连接层改为7x7的卷积网络,后面两个全连接层改为1x1的卷积网络,这个整个VGG就变成一个全卷积网络FCN。
VGG在加深CNN网络深度方面首先做出了贡献,但是VGG也有自身的局限性,不能无限制的加深网络,在网络加深到一定层数之后就会出现训练效果褪化、梯度消逝或者梯度爆炸等问题,总的来说VGG在刚提出的时候也是风靡一时,在ImageNet竞赛数据集上都取得了不错的效果