AlexNet源码实现(基于tensorflow2.0)

  • Post author:
  • Post category:其他


AlexNet是2012年ImageNet竞赛冠军获得者Hinton和他的学生Alex Krizhevsky设计的。也是在那年之后,更多的更深的神经网络被提出,比如优秀的vgg,GoogLeNet。 这对于传统的机器学习分类算法而言,已经相当的出色。

AlexNet模型架构

在这里插入图片描述

AlexNet

import cv2,os
import numpy as np
import tensorflow as tf
from random import shuffle
from tensorflow.keras.models import load_model
from tensorflow.keras import Sequential,layers,optimizers,losses,metrics

labels_num=2 # 类别数

#测试集的导入
def load_image(path,shape):
    img_list = []
    label_list = []
    dir_counter = 0
    # 对路径下的所有子文件夹中的所有jpg文件进行读取并存入到一个list中
    for child_dir in os.listdir(path):
        child_path = os.path.join(path, child_dir)
        for dir_image in os.listdir(child_path):
            img = cv2.imread(os.path.join(child_path, dir_image))
            img = img / 255.0
            img=cv2.resize(img,(shape[0],shape[1]))
            img_list.append(img)
            label_list.append(dir_counter)
        dir_counter += 1

    length= len(img_list)
    index = [i for i in range(length)]
    shuffle(index)  # 打乱索引
    img_np=np.array(img_list)
    label_np=np.array(label_list)
    img_np1 = img_np[index]
    label_np1 = label_np[index]
    train_l=int(0.7*length)

    train_data = np.array(img_np1)[0:train_l]
    train_label =np.array(label_np1)[0:train_l]
    test_data = np.array(img_np1)[train_l:length]
    test_label = np.array(label_np1)[train_l:length]
    return train_data,train_label,test_data,test_label
def model(label_num=labels_num):
    #网络层的搭建
    networks = Sequential()
    networks.add(layers.Conv2D(96, kernel_size=11, strides=4, activation='relu'))
    networks.add(layers.BatchNormalization())  # 批标准化在Relu之前
    networks.add(layers.Activation('relu'))
    # 最大池化,输出维度为(27,27,96)
    networks.add(layers.MaxPool2D(pool_size=(3, 3), strides=2))
    # padding=same,输出维度为(27,27,256)
    networks.add(layers.Conv2D(filters=256, kernel_size=(5, 5), padding='same', strides=1))
    # 最大池化,输出维度为(13,13,256)
    networks.add(layers.MaxPool2D(pool_size=(3, 3), strides=2))
    # padding=same,输出维度为(13,13,384)
    networks.add(layers.Conv2D(filters=384, kernel_size=(3, 3), padding='same', strides=1))
    # padding=same,输出维度为(13,13,384)
    networks.add(layers.Conv2D(filters=384, kernel_size=(3, 3), padding='same', strides=1))
    # padding=same,输出维度为(13,13,256)
    networks.add(layers.Conv2D(filters=256, kernel_size=(3, 3), padding='same', strides=1))
    # 最大池化,输出维度为(6,6,256)
    networks.add(layers.MaxPool2D(pool_size=(3, 3), strides=2))
    # 连接全连接层前拉直
    networks.add(layers.Flatten())
    networks.add(layers.Dense(4096, activation='relu'))
    # networks.add(layers.Dropout(0.5))
    networks.add(layers.Dense(4096, activation='relu'))
    # networks.add(layers.Dropout(0.5))
    networks.add(layers.Dense(1000, activation='softmax'))

    # 以下的layers是我额外加的
    networks.add(layers.Dense(label_num, activation='softmax'))
    return networks
def train(net,train_data,train_label):
    def get_batch(batch_size, i):
        x = batch_size * i
        train_data_batch = train_data[x:x + batch_size, :]
        train_lable_batch = train_label[x:x + batch_size]
        return train_data_batch, train_lable_batch

    epoch = 3  # 迭代次数
    batch_size = 10  # 一批要处理的图像
    shape_t=train_data.shape
    net.build(input_shape=(batch_size,shape_t[1],shape_t[2],shape_t[3]))
    num_train_data = shape_t[0]  # 训练图像总数
    batch_num = int(num_train_data // batch_size)  # 训练批数:这里必须取整
    optimizer = optimizers.Adam(learning_rate=0.001)  # 该函数可以设置一个随训练进行逐渐减小的学习率,此处我们简单的设学习率为常量
    for n in range(epoch):
        for i in range(batch_num):
            with tf.GradientTape() as tape:  # with语句内引出需要求导的量
                x, y = get_batch(batch_size, i)
                out = net(x)
                y_onehot = tf.one_hot(y, depth=labels_num)  # 一维表示类别(0-9)-> 二维表示类别(1,0,0,0,...)...
                loss_object = losses.CategoricalCrossentropy(from_logits=True)  # 交叉熵损失函数.这是一个类,loss_object为类的实例化对象
                loss = loss_object(y_onehot, out)  # 使用损失函数类来计算损失
                print('epoch:%d batch:%d loss:%f' % (n, i, loss.numpy()))
            grad = tape.gradient(loss, net.trainable_variables)  # 用以自动计算梯度. loss对网络中的所有参数计算梯度
            optimizer.apply_gradients(zip(grad, net.trainable_variables))  # 根据梯度更新网络参数
    net.save('model/AlexNet.h5')

def test(test_data,test_label):
    net=load_model('model/AlexNet.h5')
    batch_size=32
    s_c_a = metrics.SparseCategoricalAccuracy()  # metrics用于监测性能指标,这里用update_state来对比
    num_test_batch = int(test_data.shape[0] // batch_size)  # 测试集数量
    for num_index in range(num_test_batch):
        start_index, end_index = num_index * batch_size, (num_index + 1) * batch_size  # 每一批的起始索引和结束索引
        y_predict = net.predict(test_data[start_index:end_index])
        s_c_a.update_state(y_true=test_label[start_index:end_index], y_pred=y_predict)
    print('test accuracy:%f' % s_c_a.result())

if __name__ == '__main__':
    path = "E:/project_file/dataset/horse-or-human/valid"
    train_data,train_label,test_data,test_label=load_image(path,(244,244))
    net = model()
    train(net,train_data,train_label)
    print('------------------------------')
    test(test_data,test_label)

model:

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 54, 54, 96)        34944     
_________________________________________________________________
batch_normalization (BatchNo (None, 54, 54, 96)        384       
_________________________________________________________________
activation (Activation)      (None, 54, 54, 96)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 26, 26, 96)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 26, 26, 256)       614656    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 256)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 384)       885120    
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 12, 12, 384)       1327488   
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 12, 12, 256)       884992    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 256)         0         
_________________________________________________________________
flatten (Flatten)            (None, 6400)              0         
_________________________________________________________________
dense (Dense)                (None, 4096)              26218496  
_________________________________________________________________
dropout (Dropout)            (None, 4096)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 4096)              16781312  
_________________________________________________________________
dropout_1 (Dropout)          (None, 4096)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1000)              4097000   
_________________________________________________________________
dense_3 (Dense)              (None, 10)                10010     
=================================================================
Total params: 50,854,402
Trainable params: 50,854,210
Non-trainable params: 192
_________________________________________________________________



版权声明:本文为wq3095435422原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。