深度学习实践-catsvsdogs数据集
一.下载数据集
数据集来自kaggle 数据集的Dogs vs Cats数据集
百度云盘下载地址
链接:https://pan.baidu.com/s/177uL...
提取码:9j40
二.对数据进行划分
1.创建文件夹如下:
- train
- cats
- dogs
- val
- cats
- dogs
- test
def data_cats_processing():
base_train_path = r'E:\mldata\dogvscat\data\train'
base_dest_train_path = r'E:\mldata\dogvscat\train\cats'
base_dest_test_path = r'E:\mldata\dogvscat\test\cats'
base_dest_val_path = r'E:\mldata\dogvscat\val\cats'
cat_fnames = ['cat.{}.jpg'.format(i) for i in range(10000)]
for fname in cat_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_train_path, fname)
shutil.copyfile(src_path, dest_path)
cat_fnames = ['cat.{}.jpg'.format(i) for i in range(10001, 11501)]
for fname in cat_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_val_path, fname)
shutil.copyfile(src_path, dest_path)
cat_fnames = ['cat.{}.jpg'.format(i) for i in range(11501, 12500)]
for fname in cat_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_test_path, fname)
shutil.copyfile(src_path, dest_path)def data_dog_processing():
base_train_path = r'E:\mldata\dogvscat\data\train'
base_dest_train_path = r'E:\mldata\dogvscat\train\dogs'
base_dest_test_path = r'E:\mldata\dogvscat\test\dogs'
base_dest_val_path = r'E:\mldata\dogvscat\val\dogs'
dog_fnames = ['dog.{}.jpg'.format(i) for i in range(10000)]
for fname in dog_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_train_path, fname)
shutil.copyfile(src_path, dest_path)
dog_fnames = ['dog.{}.jpg'.format(i) for i in range(10001, 11501)]
for fname in dog_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_val_path, fname)
shutil.copyfile(src_path, dest_path)
dog_fnames = ['dog.{}.jpg'.format(i) for i in range(11501, 12500)]
for fname in dog_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_test_path, fname)
shutil.copyfile(src_path, dest_path)
三.批量读取数据
代码:
def datagen():
train_datagen = ImageDataGenerator(
rescale=1 / 255.)
test_datagen = ImageDataGenerator(rescale=1 / 255.)
train_dir = r'E:\mldata\dogvscat\train'
val_dir = r'E:\mldata\dogvscat\val'train_datagen = train_datagen.flow_from_directory(
train_dir,
target_size=(150, 150),
batch_size=20,
class_mode='binary'
)
val_datagen = test_datagen.flow_from_directory(
val_dir,
target_size=(150, 150),
batch_size=20,
class_mode='binary'
)
return train_datagen, val_datagen
四.创建神经网络模型
模型一.基础神经网络 使用后欠拟合,
def base_model1():
# acc 0.8255val_acc 0.7940 , 欠拟合
# 解决方式
# 1.尝试更好的优化器
# 2.尝试更大网络
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=RMSprop(learning_rate=1e-4), metrics=['acc'])
return model
模型二.使用adam作为优化器
def create_model2():
# 使用 adam
#acc: 0.8537- val_acc: 0.7920
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=1e-4), metrics=['acc'])
return model
五.开始训练
# 加载数据
train_datagen, val_datagen = datagen()
# 创建模型
model = create_model2()
# 创建tensorboard使用的log文件夹
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# 创建tensorboard callback 回调
tensorboard_callback = callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
# 开始训练
model.fit_generator(train_datagen,
steps_per_epoch=100,
epochs=30,
validation_data=https://www.it610.com/article/val_datagen,
validation_steps=50,
callbacks=[tensorboard_callback])
完整代码
import os, shutil
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, MaxPooling2D, Conv2D, Flatten, Dropout
from tensorflow.keras.optimizers import RMSprop, Adam, SGD
import datetime
from tensorflow.keras import callbacks
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model# 划分数据集 共25000
# 训练集 20000
# 验证集 3000
# 测试集 2000def data_cats_processing():
base_train_path = r'E:\mldata\dogvscat\data\train'
base_dest_train_path = r'E:\mldata\dogvscat\train\cats'
base_dest_test_path = r'E:\mldata\dogvscat\test\cats'
base_dest_val_path = r'E:\mldata\dogvscat\val\cats'
cat_fnames = ['cat.{}.jpg'.format(i) for i in range(10000)]
for fname in cat_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_train_path, fname)
shutil.copyfile(src_path, dest_path)
cat_fnames = ['cat.{}.jpg'.format(i) for i in range(10001, 11501)]
for fname in cat_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_val_path, fname)
shutil.copyfile(src_path, dest_path)
cat_fnames = ['cat.{}.jpg'.format(i) for i in range(11501, 12500)]
for fname in cat_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_test_path, fname)
shutil.copyfile(src_path, dest_path)def data_dog_processing():
base_train_path = r'E:\mldata\dogvscat\data\train'
base_dest_train_path = r'E:\mldata\dogvscat\train\dogs'
base_dest_test_path = r'E:\mldata\dogvscat\test\dogs'
base_dest_val_path = r'E:\mldata\dogvscat\val\dogs'
dog_fnames = ['dog.{}.jpg'.format(i) for i in range(10000)]
for fname in dog_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_train_path, fname)
shutil.copyfile(src_path, dest_path)
dog_fnames = ['dog.{}.jpg'.format(i) for i in range(10001, 11501)]
for fname in dog_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_val_path, fname)
shutil.copyfile(src_path, dest_path)
dog_fnames = ['dog.{}.jpg'.format(i) for i in range(11501, 12500)]
for fname in dog_fnames:
src_path = os.path.join(base_train_path, fname)
dest_path = os.path.join(base_dest_test_path, fname)
shutil.copyfile(src_path, dest_path)def datagen():
train_datagen = ImageDataGenerator(
rescale=1. / 255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
test_datagen = ImageDataGenerator(rescale=1 / 255.)
train_dir = r'E:\mldata\dogvscat\train'
val_dir = r'E:\mldata\dogvscat\val'train_datagen = train_datagen.flow_from_directory(
train_dir,
target_size=(150, 150),
batch_size=20,
class_mode='binary'
)
val_datagen = test_datagen.flow_from_directory(
val_dir,
target_size=(150, 150),
batch_size=20,
class_mode='binary'
)
return train_datagen, val_datagendef base_model1():
# acc 0.8255val_acc 0.7940 , 欠拟合
# 解决方式
# 1.尝试更好的优化器
# 2.尝试更大网络
# 3.更多数据,使用数据增强
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=RMSprop(learning_rate=1e-4), metrics=['acc'])
return modeldef create_model2():
# 使用 adam
#acc: 0.8537- val_acc: 0.7920
# 解决方式
# 1.尝试更好的优化器
# 2.尝试更大网络
# 3.更多数据,使用数据增强
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=1e-4), metrics=['acc'])
return modeldef create_model3():
# 使用 adam,数据增强器
# acc: 0.7487 - val_acc: 0.7760
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=1e-4), metrics=['acc'])
return modeldef create_model4():
# 使用 adam,增大网络
# acc: 0.7366 - val_loss: 0.5200 - val_acc: 0.7350
# 不使用数据增强
#acc: 0.8382 - val_loss: 0.4190 - val_acc: 0.8110
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=1e-4), metrics=['acc'])
return modeldef create_model5():
# 使用 vgg网络
#acc: 0.8466 - val_loss: 0.3708 - val_acc: 0.8320
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=1e-4), metrics=['acc'])
return modeldef vgg_model():
# 使用 vgg网络
#acc: 0.8466 - val_loss: 0.3708 - val_acc: 0.8320
top_model = Sequential()
top_model.add(Flatten())
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))base_model = VGG16(weights='imagenet',
include_top=False,
input_shape=(150, 150, 3))
model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
for layer in model.layers[:-1]:
layer.trainable = False
model.compile(loss='categorical_crossentropy',
optimizer=SGD(lr=1e-4, momentum=0.9),
metrics=['accuracy'])
model.summary()
return modelif __name__ == '__main__':
# train_data_processing()
# 加载数据
train_datagen, val_datagen = datagen()
# 创建模型
model = create_model2()
# 创建tensorboard使用的log文件夹
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# 创建tensorboard callback 回调
tensorboard_callback = callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
# 开始训练
model.fit_generator(train_datagen,
steps_per_epoch=100,
epochs=30,
validation_data=https://www.it610.com/article/val_datagen,
validation_steps=50,
callbacks=[tensorboard_callback])
推荐阅读
- 由浅入深理解AOP
- 继续努力,自主学习家庭Day135(20181015)
- python学习之|python学习之 实现QQ自动发送消息
- 一起来学习C语言的字符串转换函数
- 定制一套英文学习方案
- 漫画初学者如何学习漫画背景的透视画法(这篇教程请收藏好了!)
- 《深度倾听》第5天──「RIA学习力」便签输出第16期
- 如何更好的去学习
- 【韩语学习】(韩语随堂笔记整理)
- 不废话,代码实践带你掌握|不废话,代码实践带你掌握 强缓存、协商缓存!