Opencv实战——车位检测

  • Post author:
  • Post category:其他





前言

最近在bi站学习opencv的时候看到了一个比较有意思的项目,用opencv+tensorflow实现视频中的车位检测,里面涉及了大量的图像处理操作,还有用tensorflow进行模型训练,但是缺乏深度学习这部分的知识,所有模型训练这一部分的例程只是初略看过和运行。下面的内容只是我对这个项目图像处理方面操作的记录,供以后学习使用。




车位识别实现思路

在这里插入图片描述

对视频的识别其实和图像是差不多的,因为视频其实是由一帧一帧的图像构成的,首先就是要从视频中截取几张图像勇于图像处理。图像处理:

第一步:通过手动设置一个区域,过滤掉背景,只保留停车场。

第二步:边缘检测,提取停车场的大致轮廓。

第三步:进行霍夫检测,检测停车场中的直线。

第四步:对画好的线进行过滤,并在画好线的图像中进行分割,分割出每一个停车位,给每一个停车位编号。

第五步:截取出每个停车位的图像,作为训练模型的数据。



图像处理实现代码

原图:

在这里插入图片描述

手动设置一个区域,并且删除掉不需要的地方,在这之前先进行边缘检测。原理就是在图像外围点几个点,这里点了五个点,然后用线将点连起来就得到了一个mask,将mask内的区域设为1,外围设为0,将mask与图像想与就得到了去掉外围的图像。

#边缘检测
def detect_edges(image,low_threshold,high_threshold):
    return cv2.Canny(image,low_threshold,high_threshold)

#手动选择一个区域
def select_region(image):
    #设置多边形的顶点
    row, cols = image.shape[:2]
    spot1 = [cols * 0.05, row * 0.92]
    spot2 = [cols * 0.05, row * 0.70]
    spot3 = [cols * 0.30, row * 0.55]
    spot4 = [cols * 0.60, row * 0.12]
    spot5 = [cols * 0.90, row * 0.12]
    spot6 = [cols * 0.90, row * 0.92]
    vertices = np.array([[spot1,spot2,spot3,spot4,spot5,spot6]],dtype=np.int32)
    print(vertices)
    spot_img = image.copy()
    #将灰度图转为RGB图像
    spot_img = cv2.cvtColor(spot_img, cv2.COLOR_RGB2BGR)
    for spot in vertices[0]:
        #画点
        cv2.circle(spot_img, (spot[0],spot[1]), 10, (200,100,200), -1)

    #show('spot_img',spot_img)
    return (spot_img,vertices)

#删掉不需要的地方
def filter_region(image, vertices):
    mask = np.zeros_like(image)
    if len(mask.shape) == 2:
        cv2.fillPoly(mask, vertices, 255)  # 多边形填充
        show('mask', mask)
    return cv2.bitwise_and(image, mask)

效果:

在这里插入图片描述

进行霍夫检测,检测停车场中的直线。

#HoughLinesP函数是统计概率霍夫线变换函数
def hough_lines(image):
    #输入的图像需要是边缘检测后的结果
    #minLineLengh(线的最短长度,比这个短的都被忽略)和MaxLineCap(两条直线之间的最大间隔,小于此值,认为是一条直线)
    #rho距离精度,theta角度精度,threshod超过设定阈值才被检测出线段
    return cv2.HoughLinesP(image, rho=0.1, theta=np.pi/10, threshold=15, minLineLength=8, maxLineGap=1)

对画好的线进行过滤,并在画好线的图像中进行分割,分割出每一个停车位,给每一个停车位编号。大致操作也分为5步,首先对检测后得到的直线进行过滤,过滤方法是比如一条直线的长度大于20小于50,水平差小于1的则认识是车位线;然后将过滤后的线安装x1,y1 进行排序;然后将车位线安列来分类,图像中有12列车位,就分为12簇;然后得到每条线的坐标;最后就是将车位画出。分割出每一个停车位,给每一个停车位编号。

#用方框画出车位
def detected_blocks(image,lines):
    #Step 1: 过滤部分直线
    img = image.copy()
    cleaned = []
    for line in lines:
        for x1,y1,x2,y2 in line:
            if abs(y2-y1) <= 1 and abs(x2-x1) >= 20 and abs(x2-x1) <= 57:
                cleaned.append((x1,y1,x2,y2))
    #Step 2: 对直线按照x1,y1进行排序
    list1 =sorted(cleaned,key=operator.itemgetter(0,1))
    #Step 3: 找到多个列,相当于每列是一排车
    #每一列的车位
    clusters = {}
    #列数
    dIndex = 0
    #行与行之间的最小距离
    clus_dist = 20

    for i in range(len(list1)-1):
        distance = abs(list1[i+1][0] - list1[i][0])
        if distance <= clus_dist:
            if not dIndex in clusters.keys(): clusters[dIndex] = []
            clusters[dIndex].append(list1[i])
            clusters[dIndex].append(list1[i + 1])

        else:
            dIndex += 1

    # Step 4: 得到坐标
    #坐标字典
    rects = {}
    i = 0
    for key in clusters:
        all_list =clusters[key]
        cleaned =list(set(all_list))
        #如果每一列的行数大于5,则认为是一簇
        if len(cleaned) >= 10:
            #第一行和最后一行的y坐标
            cleaned = sorted(cleaned, key=lambda tup: tup[1])
            avg_y1 = cleaned[0][1]
            avg_y2 = cleaned[-1][1]
            avg_x1 = 0
            avg_x2 = 0
            #求x1,x2的平均坐标
            for tup in cleaned:
                avg_x1 += tup[0]
                avg_x2 += tup[2]
            avg_x1 = avg_x1 / len(cleaned)
            avg_x2 = avg_x2 / len(cleaned)
            rects[i] = (avg_x1, avg_y1, avg_x2, avg_y2)
            i += 1

    print("Num Parking Lanes: ", len(rects))

    # Step 5: 把列矩形画出来
    #微调数
    buff = 7
    for key in rects:
        tup_topLeft = (int(rects[key][0] - buff), int(rects[key][1]))
        tup_botRight = (int(rects[key][2] + buff), int(rects[key][3]))
        cv2.rectangle(img, tup_topLeft, tup_botRight, (255, 0, 0), 3)
    #show('img',img)
    return img,rects


#画车位
def draw_parking(image,rects,color=[0,0,255],thickness=2,save=True):
    img = image.copy()
    gap = 17
    #字典,一个车位对应一个位置
    spot_dict = {}
    tot_spots = 0
    #方框位置微调
    adj_x1 = {0: -8, 1: -15, 2: -15, 3: -12, 4: -12, 5: -15, 6: -15, 7: -15, 8: -15, 9: -10, 10: -10, 11: 0}
    adj_y1 = {0: -15, 1: -10, 2: 0, 3: -11, 4: 22, 5: 5, 6: -15, 7: -15, 8: -20, 9: 0, 10: 40, 11: -10}

    adj_x2 = {0: 5, 1: 15, 2: 15, 3: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 13, 9: 10, 10: 10, 11: 5}
    adj_y2 = {0: -10, 1: 20, 2: 15, 3: 10, 4: 5, 5: 15, 6: -5, 7: -20, 8: 15, 9: 15, 10: 0, 11: 30}
    for key in rects:
        tup = rects[key]
        x1 = int(tup[0]+adj_x1[key])#坐标微调
        y1 = int(tup[1]+adj_y1[key])
        x2 = int(tup[2]+adj_x2[key])
        y2 = int(tup[3]+adj_y2[key])
        cv2.rectangle(img,(x1,y1),(x2,y2),(0,255,0),thickness)
        #show('res',img)
        num_splits =int(abs(y2-y1)//gap)
        #分割车位
        for i in range(0, num_splits + 1):
            y = int(y1 + i * gap)
            cv2.line(img, (x1, y), (x2, y), color, thickness)
        #show('res',img)
        #如果一列中有两簇车位,从中间分开
        if key > 0 and key < len(rects) - 1:
            # 竖直线
            x = int((x1 + x2) / 2)
            cv2.line(img, (x, y1), (x, y2), color, thickness)
    #show('res',img)
        # 计算数量车位数量,一列中有两簇的就乘2
        if key == 0 or key == (len(rects) -1):
            tot_spots += num_splits +1
        else:
            tot_spots += 2*(num_splits +1)
        # 将每个车位与字典对应
        if key == 0 or key == (len(rects) - 1):
            for i in range(0, num_splits + 1):
                cur_len = len(spot_dict)
                y = int(y1 + i * gap)
                spot_dict[(x1, y, x2, y + gap)] = cur_len + 1
        else:
            for i in range(0, num_splits + 1):
                cur_len = len(spot_dict)
                y = int(y1 + i * gap)
                x = int((x1 + x2) / 2)
                spot_dict[(x1, y, x, y + gap)] = cur_len + 1
                spot_dict[(x, y, x2, y + gap)] = cur_len + 2
    print("total parking spaces: ", tot_spots, cur_len)
    if save:
        filename = 'with_parking.jpg'
        cv2.imwrite(filename, img)
    return img, spot_dict

截取出每个停车位的图像,作为训练模型的数据。得到的数据还需要自己分类,一类是空车位,另一类是被占据的

#将每个车位都单独截取出来,并保存
def save_images_for_cnn_data(image, spot_dict, folder_name='cnn_data'):
    for spot in spot_dict.keys():
        (x1, y1, x2, y2) = spot
        (x1, y1, x2, y2) = (int(x1), int(y1), int(x2), int(y2))
        # 裁剪
        spot_img = image[y1:y2, x1:x2]
        # 图像放大
        spot_img = cv2.resize(spot_img, (0, 0), fx=2.0, fy=2.0)
        spot_id = spot_dict[spot]
        #图像名
        filename = 'spot' + str(spot_id) + '.jpg'
        print(spot_img.shape, filename, (x1, x2, y1, y2))
        #保存图片
        cv2.imwrite(os.path.join(folder_name, filename), spot_img)

在这里插入图片描述



模型训练



安装keras和tensorflow

因为keras与2.2.0以下tensorflow的版本是有应关系,之前是默认pip最新的keras,导致不能使用tensorflow进行训练。这里下载的keras为2.3.1版本,tensorflow为2.2.0版本。

安装keras

pip install keras==2.3.1 -i https://pypi.tuna.tsinghua.edu.cn/simple

安装tensorflow

pip install tensorflow==2.2.0 -i https://pypi.tuna.tsinghua.edu.cn/simple



训练

训练直接用的是训练代码,里面的注释都很详细了,需要将数据集的路径改为自己的。训练时会中GitHub上下载训练权重,之前我还以为是代码问题导致不能下载,原理是训练权重下载失败,出现下载失败多试几次即可。训练完成后会生成weights.h5的模型文件。

import numpy
import os
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.initializers import TruncatedNormal
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense


files_train = 0
files_validation = 0

cwd = os.getcwd()
#训练集
folder = 'train_data/train'
for sub_folder in os.listdir(folder):
    path, dirs, files = next(os.walk(os.path.join(folder,sub_folder)))
    files_train += len(files)

#测试集
folder = 'train_data/test'
for sub_folder in os.listdir(folder):
    path, dirs, files = next(os.walk(os.path.join(folder,sub_folder)))
    files_validation += len(files)

print(files_train,files_validation)

#图像最小就是48*48
img_width, img_height = 48, 48
train_data_dir = "train_data/train"
validation_data_dir = "train_data/test"
nb_train_samples = files_train
nb_validation_samples = files_validation
batch_size = 32
epochs = 15
#类别数
num_classes = 2



''' 
include_top:是否保留顶层的3个全连接网络
weights:None代表随机初始化,即不加载预训练权重。'imagenet’代表加载预训练权重
input_tensor:可填入Keras tensor作为模型的图像输出tensor
input_shape:可选,仅当include_top=False有效,应为长为3的tuple,指明输入图片的shape,图片的宽高必须大于48,如(200,200,3)1
'''
model = applications.VGG16(weights='imagenet', include_top=False, input_shape = (img_width, img_height, 3))

#加快训练速度,冻结前10层
for layer in model.layers[:10]:
    layer.trainable = False
'''
Dense层:
    units: 神经元节点数数,鸡输出空间维度。
    activation: 激活函数,若不指定,则不使用激活函数 (即线性激活: a(x) = x)。
    use_bias: 布尔值,该层是否使用偏置向量。
    kernel_initializer: kernel 权值矩阵的初始化器
    bias_initializer: 偏置向量的初始化器
    kernel_regularizer: 运用到 kernel 权值矩阵的正则化函数
    bias_regularizer: 运用到偏置向的的正则化函数
    activity_regularizer: 运用到层的输出的正则化函数 (它的 “activation”)。
    kernel_constraint: 运用到 kernel 权值矩阵的约束函数
    bias_constraint: 运用到偏置向量的约束函数
'''
x = model.output
x = Flatten()(x)
predictions = Dense(num_classes, activation="softmax")(x)


model_final = Model(input = model.input, output = predictions)

"""
 model.compile(optimizer = 优化器,

                        loss = 损失函数,

                        metrics = ["准确率”])
"""
model_final.compile(loss = "categorical_crossentropy",
                    optimizer = optimizers.SGD(lr=0.0001, momentum=0.9),
                    metrics=["accuracy"])

"""
数据增强
ImageDataGenerator(
    rescale=所有数据集将乘以该数值,
    rotation_range=随即旋转角度数范围,
    width_shift_range=随即宽度偏移量,
    height_shift_range=随即高度偏移量,     horizontal_flip=是否随机水平翻转,
    zoom_range=随机缩放的范围 -> [1-n,1+n])
    该函数可以增强图片数据,需要fit函数来对指定的数据进行增强,这里要求是四维数据(图片张数,图片长度,图片宽度,灰度),先reshape为四维数据然后调用fit函数e
"""
train_datagen = ImageDataGenerator(
rescale = 1./255,
horizontal_flip = True,
fill_mode = "nearest",
zoom_range = 0.1,
width_shift_range = 0.1,
height_shift_range=0.1,
rotation_range=5)
#数据增强
test_datagen = ImageDataGenerator(
rescale = 1./255,
horizontal_flip = True,
fill_mode = "nearest",
zoom_range = 0.1,
width_shift_range = 0.1,
height_shift_range=0.1,
rotation_range=5)

train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size = (img_height, img_width),
batch_size = batch_size,
class_mode = "categorical")

validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size = (img_height, img_width),
class_mode = "categorical")

"""
回调函数:
    filename:字符串,保存模型的路径,filepath可以是格式化的字符串,里面的占位符将会被epoch值和传入on_epoch_end的logs关键字所填入。
    例如:
    filepath = “weights_{epoch:03d}-{val_loss:.4f}.h5”
    则会生成对应epoch和验证集loss的多个文件。
    monitor:需要监视的值,通常为:val_acc 或 val_loss 或 acc 或 loss
    verbose:信息展示模式,0或1。为1表示输出epoch模型保存信息,默认为0表示不输出该信息,信息形如:
    Epoch 00001: val_acc improved from -inf to 0.49240, saving model to /xxx/checkpoint/model_001-0.3902.h5
    save_best_only:当设置为True时,将只保存在验证集上性能最好的模型
    mode:‘auto’,‘min’,‘max’之一,在save_best_only=True时决定性能最佳模型的评判准则,例如,当监测值为val_acc时,模式应为max,当检测值为val_loss时,模式应为min。在auto模式下,评价准则由被监测值的名字自动推断。
    save_weights_only:若设置为True,则只保存模型权重,否则将保存整个模型(包括模型结构,配置信息等)
    period:CheckPoint之间的间隔的epoch数

"""
checkpoint = ModelCheckpoint("weights.h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=10, verbose=1, mode='auto')



#生成器函数下·下·
history_object = model_final.fit_generator(
train_generator,
samples_per_epoch = nb_train_samples,
epochs = epochs,
validation_data = validation_generator,
nb_val_samples = nb_validation_samples,
callbacks = [checkpoint, early])



模型预测

载入训练模式对每个车位进行预测,这里我只对图片进行了预测,效果一般。整张图片经过处理后有500多个车位,相当于每张图片都要进行500多次的预测,这就导致花费的时间较长,而视频是由一帧一帧的图像构成的,假设一秒有60真就是60张图像,需要预测一秒的视频就要预测30000次,可想而知这个计算量有多大,而且本人的吸尘器(笔记本电脑)又比较lj,所以就没有对视频进行预测

# 用训练好的模型进行预测
def make_prediction(image, model, class_dictionary):
    # 预处理
    img = image / 255.

    # 转换成4D tensor
    image = np.expand_dims(img, axis=0)

    # 用训练好的模型进行预测
    class_predicted = model.predict(image)
    inID = np.argmax(class_predicted[0])
    label = class_dictionary[inID]
    return label
#图片测试
def predict_image(image,spot_dict,model,class_dictional,color=[255,0,0],alpha=0.5):
    new_image = image.copy()
    overlay = image.copy()
    cnt_empty = 0
    all_spots = 0
    for spot in spot_dict.keys():
        all_spots += 1
        (x1, y1, x2, y2) = spot
        (x1, y1, x2, y2) = (int(x1), int(y1), int(x2), int(y2))
        spot_img = image[y1:y2, x1:x2]
        spot_img = cv2.resize(spot_img, (48, 48))
        label = make_prediction(spot_img,model,class_dictional)
        if label == 'empty':
            cv2.rectangle(overlay, (int(x1), int(y1)), (int(x2), int(y2)), color, -1)
            cnt_empty += 1
    cv2.addWeighted(overlay, alpha, new_image, 1 - alpha, 0, new_image)

    cv2.putText(new_image, "Available: %d spots" % cnt_empty, (30, 95),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (255, 255, 255), 2)

    cv2.putText(new_image, "Total: %d spots" % all_spots, (30, 125),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (255, 255, 255), 2)

    # save = False不保存图片
    # if save:
    #     filename = 'with_marking.jpg'
    #     cv2.imwrite(filename, new_image)
    # cv_show('new_image', new_image)
    show('new_image',new_image)
    return new_image

最终效果:

在这里插入图片描述

完整代码:

链接:https://pan.baidu.com/s/1KrLEKTkMY5iFncaVFqBwEA

提取码:2333



版权声明:本文为Thousand_drive原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。