用tensorflow训练CNN网络,并实现人脸识别的完整代码(python3.6 + opencv + tensorflow1.9.0 + numpy1.16.4)

  • Post author:
  • Post category:python


第一步:通过opencv读取摄像头,或本地存储的视频,获得人脸灰度图像,作为训练集,验证集,和测试集。

保存格式如下,例如训练集train_images 中有子目录: person0, person1, person2,…,每个子目录中放置训练用到的person名字的所有图片,格式如下:

在这里插入图片描述

实现代码:

import cv2
import os
import numpy as np

def create_dir(*args):
    for item in args:
        if not os.path.exists(item):
            os.makedirs(item)

def get_padding_size(shape):
    """  square rect 得到短边需填充的像素长度"""
    h,w = shape
    longest = max(w, h)
    result = (np.array([longest]*4) - np.array([h,h,w,w]))//2
    return result.tolist()

def resize_image(img, h=64, w=64):
    """ 填充并裁剪图像, 使图像大小一致"""
    top, bottom, left, right = get_padding_size(img.shape[0:2])  # 填充短边,使与长边一致
    img = cv2.copyMakeBorder(img, top, bottom, left, right,
                             cv2.BORDER_CONSTANT, value=(0,0,0))
    img = cv2.resize(img, (h, w))
    return img

def relight(imgsrc, alpha=1, bias=0):
    """ 改变图像的亮度,增强模型的泛化能力 """
    imgsrc.astype("float")
    imgsrc = imgsrc * alpha + bias
    imgsrc[imgsrc < 0] = 0
    imgsrc[imgsrc > 255] = 255
    imgsrc.astype(np.uint8)
    return imgsrc

def detect_face(n, frame, haar, outdir):
    """检测人脸并保存图像到outdir文件夹中,标记出人脸位置 """
    # 生成灰度图,提高检测效率
    img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = haar.detectMultiScale(img_gray, 1.3, 5)
    for face_x, face_y, face_w, face_h in faces:
        # 保存人脸图像
        face = img_gray[face_x:face_x+face_w, face_y:face_y+face_h]
        face = resize_image(face)
        light_face = relight(face, np.random.uniform(0.5,1.5), np.random.randint(-50,50))
        cv2.imwrite(os.path.join(outdir, '{}.jpg'.format(n)), light_face)
        # 框出人脸
        cv2.putText(frame, "name", (face_x, face_y-20),cv2.FONT_HERSHEY_COMPLEX,
                    1, 255, 2)
        frame = cv2.rectangle(frame, (face_x, face_y),(face_x+face_w,face_y+face_h),
                              (255, 0,0), 2)
        print("frame",frame.shape)
    return frame

def get_face_from_camera(outdir):
    """ 打开摄像头,调用detect_face函数, 获取人脸图像,并保存到outdir文件夹 """
    create_dir(outdir)
    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    print(size)

    haar = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")  # haar特征检测人脸
    # haar.load("haarcascade_frontalface_default.xml")
    for i in range(140):
        print("It`s processing {} image".format(i))
        ret, frame = cap.read()
        frame1 = detect_face(i, frame, haar, outdir)

        cv2.imshow("frame", frame1)
        # time.sleep(1)
        k = cv2.waitKey(20)
        if k & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

def get_face_from_local(local_path, outdir):
    """ 读取本地视频,调用detect_face函数, 获取人脸图像,并保存到outdir文件夹"""
    cap = cv2.VideoCapture(local_path)

    size = (int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)))
    create_dir(outdir)
    haar = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
    i = 0
    while True:
        ret, frame = cap.read()
        if ret:
            # 本地视频中图像过大时, 可裁剪并缩放
            frame = frame[300:size[0],:]
            frame = cv2.resize(frame, (400,400))
            frame = detect_face(i, frame, haar, outdir)
            i += 1

            cv2.imshow("frame", frame)
            k = cv2.waitKey(20)
            if k & 0xff == ord('q'):
                break
        else:
            break
    cap.release()
    cv2.destroyAllWindows()

if __name__=="__main__":

    get_face_from_camera("train_images/person0")

第二步:训练CNN网络,并保存精度最高的一代(epoch)模型。

CNN网络:3层卷积-池化,2层全连接,最后softmax输出。

import os
import tensorflow as tf
import numpy as np
import PIL

def conv2d(input, weight_shape, bias_shape):
    size = weight_shape[0] * weight_shape[1] * weight_shape[2]
    weight_init = tf.random_normal_initializer(stddev=(2.0/size)**0.5)
    bias_init = tf.constant_initializer(value=0)
    w = tf.get_variable(name='w', shape=weight_shape, initializer=weight_init)
    b = tf.get_variable(name='b', shape=bias_shape, initializer=bias_init)
    conv_out = tf.nn.conv2d(input, w, strides=[1,1,1,1], padding="SAME")
    return tf.nn.relu(tf.nn.bias_add(conv_out, b))

def max_pool(input, k=2):
    return tf.nn.max_pool(input, ksize=[1,k,k,1],
                          strides=[1,k,k,1], padding="SAME")

def layer(input, weight_shape, bias_shape):
    weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5)
    bias_init = tf.constant_initializer(value=0)
    w = tf.get_variable('w', shape=weight_shape, initializer=weight_init)
    b = tf.get_variable('b', shape=bias_shape, initializer=bias_init)
    output = tf.nn.relu(tf.matmul(input, w) + b)
    return output

def inference(x):
    x = tf.reshape(x, shape=[-1, 64, 64, 1])
    with tf.variable_scope("conv_1"):
        conv_1 = conv2d(x, [5, 5, 1, 32], [32])
        pool_1 = max_pool(conv_1)  # x.shape=[-1, 32,32,32]
    with tf.variable_scope("conv_2"):
        conv_2 = conv2d(pool_1, [3,3,32,64], [64])
        pool_2 = max_pool(conv_2)
    with tf.variable_scope("conv_3"):
        conv_3 = conv2d(pool_2, [3,3,64,64], [64])
        pool_3 = max_pool(conv_3)
    with tf.variable_scope("fc_1"):
        input = tf.reshape(pool_3, [-1, 8*8*64])
        fc_1 = layer(input, [8*8*64, 64], [64])
        fc_1_drop = tf.nn.dropout(fc_1, keep_prob=0.5)
    with tf.variable_scope("output"):
        output = layer(fc_1_drop, [64, 2], [2])
    return output

def loss(output, y):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=y)
    cost = tf.reduce_mean(cross_entropy)
    return cost

def train(cost, global_step, learning_rate):
    tf.summary.scalar("cost", cost)
    # train_op = tf.train.GradientDescentOptimizer(learning_rate).\
    #     minimize(cost, global_step=global_step)
    # 动量梯度优化
    train_op = tf.train.MomentumOptimizer(learning_rate, 0.9).\
        minimize(cost, global_step=global_step)
    return train_op

def evaluate(output, y):
    correct_predict = tf.equal(tf.argmax(output,axis=1), tf.argmax(y,axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
    return accuracy

def read_image(images_dir):
    """
    用于读取本地图像为array
    每个人图像所在目录规则:train_images --> person0,person1,person2
                      validation_images --> person0, person1, person2
    所有图像大小必须相同
    """
    data = []
    labels = []
    fpaths = []
    for dir in os.listdir(images_dir):  # 共有几个人的图像,就有几个文件夹
        for i in os.listdir(os.path.join(images_dir, dir)):
            fpath = os.path.join(os.path.join(images_dir, dir), i)
            image = PIL.Image.open(fpath)
            image = np.array(image) / 255.0   # 对图像数据归一化
            # print(image.shape)
            label = int(dir[-1])

            fpaths.append(fpath)
            data.append(image)   #
            labels.append(label)
    data = np.array(data)
    labels = np.array(labels)
    return fpaths, data, labels


def one_hot(labels, Label_class):
    """独热编码-->为label编码 --> 10000, 01000, 00100, 00010, 00001 """
    one_hot_label = np.array([[int(i == int(labels[j]))
                               for i in range(Label_class)]
                              for j in range(len(labels))])
    return one_hot_label

def local_image_train():
    """ 用本地图像训练人脸识别模型 """
    learning_rate = 0.01
    training_epochs = 50
    display_epoch = 1
    batch_size = 32

    # 获得训练集images和labels
    fpaths, input, labels = read_image("train_images")
    labels = one_hot(labels, 2)
    # 打乱训练集数据的顺序
    n = np.random.permutation(len(input))
    input = input[n, :]
    labels = labels[n, :]
    print("shape of data:{}, shape of labels:{}".format(input.shape, labels.shape))

    # 获得验证集images和labels
    _, validation_images, validation_labels = read_image("validation_images")
    validation_labels = one_hot(validation_labels, 2)

    with tf.Session() as sess:
        x = tf.placeholder(tf.float32, [None, 64,64])
        y = tf.placeholder(tf.float32, [None,2])

        output = inference(x)
        cost = loss(output, y)
        global_step = tf.Variable(0, trainable=False, name="global_step")
        train_op = train(cost, global_step, learning_rate)
        eval_op = evaluate(output, y)
        # 保存图
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter("summary_graph/",
                                               graph=sess.graph)
        # 保存精度最高一代的模型
        saver = tf.train.Saver(max_to_keep=1)

        init = tf.global_variables_initializer()
        sess.run(init)
        avg_cost = 0
        max_acc = 0

        for step in range(training_epochs):
            # 训练模型
            total_batch = input.shape[0]//batch_size
            for i in range(total_batch):
                input_batch = input[(i*batch_size):((i+1)*batch_size), :]
                # print("input_patch",input_batch.shape)
                label_batch = labels[(i*batch_size):((i+1)*batch_size), :]
                feed_dict = {x: input_batch, y: label_batch}
                sess.run(train_op, feed_dict=feed_dict)
                mini_cost = sess.run(cost, feed_dict=feed_dict)
                avg_cost += mini_cost/total_batch

            if step % display_epoch == 0:
                # 验证模型精度
                accuracy = sess.run(eval_op, feed_dict={x:validation_images, y:validation_labels})
                print("validation error is {}".format(1-accuracy))

                summary_str = sess.run(summary_op, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, sess.run(global_step))
				# 只保存精度最高的一代模型
                if accuracy > max_acc:
                    max_acc = accuracy
                    saver.save(sess, "cnn_model/network.ckpt", global_step=global_step)

        print("Optimization Finished!")
    sess.close()

if __name__ == "__main__":
    # 用本地图像训练人脸识别模型
    local_image_train()

第三步:恢复训练好的模型,用测试集测试模型精度。或调用摄像头,实现人脸识别,当图像库中的人脸出现在屏幕上时标记出人物姓名。

import tensorflow as tf
import cv2
from cnn_train import read_image, one_hot, inference
from opencv_collecting_face import resize_image
import numpy as np

def cnn_test():
    _, test_images, test_labels = read_image("test_images")
    test_labels = one_hot(test_labels, 2)

    x = tf.placeholder("float", shape=[None, 64, 64])
    output = inference(x)

    with tf.Session() as sess:
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint("cnn_model/"))
        predict = sess.run(output, feed_dict={x:test_images})
        correct_predict = tf.equal(tf.argmax(predict, 1), tf.argmax(test_labels, 1))
        print(sess.run(correct_predict))
        accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
        print(sess.run(accuracy))
        sess.close()


def face_recognition():
    """ 打开摄像头,调用训练好的cnn网络进行人脸识别,并标出名字"""
    cap = cv2.VideoCapture(0)
    haar = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
    x = tf.placeholder(tf.float32, shape=[64, 64])

    with tf.Session() as sess:
        output = inference(x)
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint("cnn_model/"))

        while cap.isOpened():
            ret, frame = cap.read()
            gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            faces = haar.detectMultiScale(gray_image, 1.3, 5)
            for face_x, face_y, face_w, face_h in faces:
                face = gray_image[face_x: face_x+face_w, face_y: face_y+face_h]
                # 需要裁剪获得的人脸图像,用于训练好的cnn模型
                face = resize_image(face)
                # 归一化
                face = face / 255.0
                predict = sess.run(output, feed_dict={x: face})  # 预测结果是array,独热编码

                label = tf.argmax(predict, 1)  # tf.argmax()结果是(1,)
                # print("label", label.shape)
                label = int(sess.run(label))
                label_name = ['person0', 'person1', 'person2']

                # 预测结果label转化成人名
                cv2.putText(frame, label_name[label], (face_x, face_y-20),
                            cv2.FONT_HERSHEY_COMPLEX, 1, 255, 2)
                frame = cv2.rectangle(frame, (face_x, face_y),
                                      (face_x+face_w, face_y+face_h),(255,0,0),2)
                cv2.imshow("face recognition", frame)

            if cv2.waitKey(20) & 0xff == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()

        sess.close()

if __name__=="__main__":
    # cnn_test()
    face_recognition()

若是用mnist数据集训练手写体识别,可以在第二步的程序中做以下修改:

# 将第2步程序中的inference函数修改为以下代码,增加mnist_test()函数,其余不变
def inference(x):
    """用mnist数据集训练的cnn网络"""
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    with tf.variable_scope("conv_1"):
        conv_1 = conv2d(x, [5, 5, 1, 32], [32])
        pool_1 = max_pool(conv_1)  # x.shape=[-1, 32,32,32]
    with tf.variable_scope("conv_2"):
        conv_2 = conv2d(pool_1, [3,3,32,64], [64])
        pool_2 = max_pool(conv_2)
    with tf.variable_scope("conv_3"):
        conv_3 = conv2d(pool_2, [3,3,64,64], [64])
        pool_3 = max_pool(conv_3)
    with tf.variable_scope("fc_1"):
        input = tf.reshape(pool_3, [-1, 4*4*64])
        fc_1 = layer(input, [4*4*64, 64], [64])
        fc_1_drop = tf.nn.dropout(fc_1, keep_prob=0.5)
    with tf.variable_scope("output"):
        output = layer(fc_1_drop, [64, 10], [10])
    return output

def mnist_test():
    """用mnist数据集训练手写体识别模型"""
    with tf.Graph().as_default():
        learning_rate = 0.01
        training_epochs = 10
        display_epoch = 1
        batch_size = 256

        # mnist数据集验证模型效果
        from tensorflow.examples.tutorials.mnist import input_data
        mnist = input_data.read_data_sets("MNIST_data/data/", one_hot=True)

        x = tf.placeholder('float', shape=[None,28*28])
        y = tf.placeholder('float', shape=[None,10])

        output = inference(x)
        cost = loss(output, y)
        global_step = tf.Variable(0, trainable=False, name="global_step")
        train_op = train(cost, global_step=global_step, learning_rate=learning_rate)
        eval_op = evaluate(output, y)

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        avg_cost = 0
        for step in range(training_epochs):
            total_batch = int(mnist.train.num_examples/batch_size)
            for i in range(5):  # 运算太慢,因此只用部分image测试。
                mbatch_x, mbatch_y = mnist.train.next_batch(total_batch)
                # print("mbatch_x", mbatch_x.shape)
                feed_dict = {x:mbatch_x, y:mbatch_y}
                sess.run(train_op, feed_dict=feed_dict)
                mini_cost = sess.run(cost, feed_dict=feed_dict)
                avg_cost += mini_cost/total_batch
            print(avg_cost)
            if step % display_epoch == 0:
                val_feed_fict = {x:mnist.validation.images, y:mnist.validation.labels}
                accuracy = sess.run(eval_op, feed_dict=val_feed_fict)
                print("validation error:", 1-accuracy)
        print("optimization finished!")

if __name__ == "__main__":
    # 用mnist数据集训练手写体识别模型
    mnist_test()

实现效果:请自己尝试!



版权声明:本文为weixin_43207115原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。