Open3d对kitti数据集可视化

  • Post author:
  • Post category:其他

1.数据集工程结构

数据集和kitti类解析文件结构如下:

在这里插入图片描述
数据集的工程结构如下:
在这里插入图片描述

kitti文件夹的部分直接将kitti数据集对应文件放入其中,trainning文件夹可以将我们训练的数据集放入其中。
该结构很重要因为他涉及到下面对kiiti数据集进行解析,相对路径的设置。

2.kitti数据集解析

kitti数据集的中含有bin,rgb,label,calib这四类文件。其中中calib和label为txt文件。
我们需要逐行读入,然后存入到数组中,然后按照数组的索引获取具体的值读入到后续程序中。

Kitti_Dataset类

下面是kitti_Dataset.py文件的代码:
import os
import numpy as np
from data.calib import Calib
from data.object3d import Object3d
import cv2

class Kitti_Dataset:
    def __init__(self, dir_path, split="training"):
        super(Kitti_Dataset, self).__init__()
        self.dir_path = os.path.join(dir_path, split)
        # calib矫正参数文件夹地址
        self.calib = os.path.join(self.dir_path, "calib")
        # RGB图像的文件夹地址
        self.images = os.path.join(self.dir_path, "img")
        # 点云图像文件夹地址
        self.pcs = os.path.join(self.dir_path, "velodyne")
        # 标签文件夹的地址
        self.labels = os.path.join(self.dir_path, "label")

    # 得到当前数据集的大小
    def __len__(self):
        file = []
        for _, _, file in os.walk(self.images):
            pass

        # 返回rgb图片的数量
        return len(file)

    # 得到矫正参数的信息
    def get_calib(self, index):
        # 得到矫正参数文件
        calib_path = os.path.join(self.calib, "{:06d}.txt".format(index))
        with open(calib_path) as f:
            lines = f.readlines()

        lines = list(filter(lambda x: len(x) and x != '\n', lines))
        dict_calib = {}
        for line in lines:
            key, value = line.split(":")
            dict_calib[key] = np.array([float(x) for x in value.split()])
        return Calib(dict_calib)

    def get_rgb(self, index):
        # 首先得到图片的地址
        img_path = os.path.join(self.images, "{:06d}.png".format(index))
        return cv2.imread(img_path)

    def get_pcs(self, index):
        pcs_path = os.path.join(self.pcs, "{:06d}.bin".format(index))
        # 点云的四个数据(x, y, z, r)
        aaa = np.fromfile(pcs_path, dtype=np.float32, count=-1).reshape([-1, 4])
        return aaa[:, :3]

    def get_labels(self, index):
        labels_path = os.path.join(self.labels, "{:06d}.txt".format(index))
        with open(labels_path) as f:
            lines = f.readlines()
        lines = list(filter(lambda x: len(x) > 0 and x != '\n', lines))

        return [Object3d(x) for x in lines]

Object3d类

下面是object3d.py文件中的代码:
import numpy as np
class Object3d:
    def __init__(self, content):
        super(Object3d, self).__init__()
        # content 就是一个字符串,根据空格分隔开来
        lines = content.split()

        # 去掉空字符
        lines = list(filter(lambda x: len(x), lines))

        self.name, self.truncated, self.occluded, self.alpha = lines[0], float(lines[1]), float(lines[2]), float(lines[3])

        self.bbox = [lines[4], lines[5], lines[6], lines[7]]
        self.bbox = np.array([float(x) for x in self.bbox])
        self.dimensions = [lines[8], lines[9], lines[10]]
        self.dimensions = np.array([float(x) for x in self.dimensions])
        self.location = [lines[11], lines[12], lines[13]]
        self.location = np.array([float(x) for x in self.location])
        self.rotation_y = float(lines[14])
        #这一行是模型训练后的label通常最后一行是阈值,可以同个这个过滤掉概率低的object
        #如果只要显示kitti本身则不需要这一行
        #self.ioc = float(lines[15])

Calib类

下面是calib.py文件中的代码:
class Calib:
    def __init__(self, dict_calib):
        super(Calib, self).__init__()
        self.P0 = dict_calib['P0'].reshape(3, 4)
        self.P1 = dict_calib['P1'].reshape(3, 4)
        self.P2 = dict_calib['P2'].reshape(3, 4)
        self.P3 = dict_calib['P3'].reshape(3, 4)
        self.R0_rect = dict_calib['R0_rect'].reshape(3, 3)
        self.P0 = dict_calib['P0'].reshape(3, 4)
        self.Tr_velo_to_cam = dict_calib['Tr_velo_to_cam'].reshape(3, 4)
        self.Tr_imu_to_velo = dict_calib['Tr_imu_to_velo'].reshape(3, 4)



3.单帧显示kiiti的点云bin demo

注:demo的脚本都写在和data同级目录下*

下面是点云俯视图的显示结果展示:

鸟瞰图视角

下面是对点云的主视图进行展示:

在这里插入图片描述

下面是点云显示的代码展示:
该代码不用直接运行,需要通过terminal中的命令行进行运行。运行的命令是:
python one_bin_show.py --index 10
其中 one_bin_show.py是代码所在文件, 10为想要显示的点云的索引。

one_bin_show.py

import os

import cv2
import numpy as np
import time
import open3d as o3d
from data.kitti_Dataset import Kitti_Dataset


from pathlib import Path
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--index', type=str, default=None, help='index for the label data', required=True)
args = parser.parse_args()

# 根据偏航角计算旋转矩阵(逆时针旋转)
def rot_y(rotation_y):
    cos = np.cos(rotation_y)
    sin = np.sin(rotation_y)
    R = np.array([[cos, 0, sin], [0, 1, 0], [-sin, 0, cos]])
    return R



def draw_3dframeworks(vis,points):

    position = points
    points_box = np.transpose(position)

    lines_box = np.array([[0, 1], [1, 2], [0, 3], [2, 3], [4, 5], [4, 7], [5, 6], [6, 7],
                          [0, 4], [1, 5], [2, 6], [3, 7], [0, 5], [1, 4]])
    colors = np.array([[1., 0., 0.] for j in range(len(lines_box))])
    line_set = o3d.geometry.LineSet()

    line_set.points = o3d.utility.Vector3dVector(points_box)
    line_set.lines = o3d.utility.Vector2iVector(lines_box)
    line_set.colors = o3d.utility.Vector3dVector(colors)


    render_option.line_width = 5.0
    vis.update_geometry(line_set)
    render_option.background_color = np.asarray([1, 1, 1])
    # vis.get_render_option().load_from_json('renderoption_1.json')
    render_option.point_size = 4
    #param = o3d.io.read_pinhole_camera_parameters('BV.json')



    print(render_option.line_width)
    ctr = vis.get_view_control()

    vis.add_geometry(line_set)
    #ctr.convert_from_pinhole_camera_parameters(param)
    vis.update_geometry(line_set)
    vis.update_renderer()

if __name__ == "__main__":
    dir_path ="data\\object"
    # dir_path = Path(args.path_dataset)
    index = args.index
    index = int(index)
    # split = "kitti"
    split = "training"
    dataset = Kitti_Dataset(dir_path, split=split)

    vis = o3d.visualization.Visualizer()
    vis.create_window(width=771, height=867)

    obj = dataset.get_labels(index)
    img3_d = dataset.get_rgb(index)
    calib1 = dataset.get_calib(index)
    pc = dataset.get_pcs(index)
    print(img3_d.shape)
    point_cloud = o3d.geometry.PointCloud()

    point_cloud.points = o3d.utility.Vector3dVector(pc)
    point_cloud.paint_uniform_color([0, 121/255, 89/255])
    vis.add_geometry(point_cloud)
    render_option = vis.get_render_option()
    render_option.line_width = 4

    for obj_index in range(len(obj)):
        if obj[obj_index].name == "Car" or obj[obj_index].name == "Pedestrian" or obj[obj_index].name == "Cyclist":
            # 阈值设置 ioc 
            # 如果需要显示自己的trainninglabel结果,需要取消这样的注释,并取消object3d.py最后一行的注释
            #if (obj[obj_index].name == "Car" and obj[obj_index].ioc >= 0.7) or  obj[obj_index].ioc > 0.5:
                R = rot_y(obj[obj_index].rotation_y)
                h, w, l = obj[obj_index].dimensions[0], obj[obj_index].dimensions[1], obj[obj_index].dimensions[2]
                x = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
                y = [0, 0, 0, 0, -h, -h, -h, -h]
                # y = [h / 2, h / 2, h / 2, h / 2, -h / 2, -h / 2, -h / 2, -h / 2]
                z = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
                # 得到目标物体经过旋转之后的实际尺寸(得到其在相机坐标系下的实际尺寸)
                corner_3d = np.vstack([x, y, z])
                corner_3d = np.dot(R, corner_3d)

                # 将该物体移动到相机坐标系下的原点处(涉及到坐标的移动,直接相加就行)
                corner_3d[0, :] += obj[obj_index].location[0]
                corner_3d[1, :] += obj[obj_index].location[1]
                corner_3d[2, :] += obj[obj_index].location[2]
                corner_3d = np.vstack((corner_3d, np.zeros((1, corner_3d.shape[-1]))))
                corner_3d[-1][-1] = 1


                inv_Tr = np.zeros_like(calib1.Tr_velo_to_cam)
                inv_Tr[0:3, 0:3] = np.transpose(calib1.Tr_velo_to_cam[0:3, 0:3])
                inv_Tr[0:3, 3] = np.dot(-np.transpose(calib1.Tr_velo_to_cam[0:3, 0:3]), calib1.Tr_velo_to_cam[0:3, 3])

                Y = np.dot(inv_Tr, corner_3d)

                draw_3dframeworks(vis, Y)

    vis.run()

你运行后的点云渲染和视角都不佳,那是因为你并没有载入相应的renderoption.json和view.json.
如果有需要可以联系我,或者等后续讲解。

4.Opencv对img进行显示demo

opencv显示的效果如下,可以通过a,d按键控制索引切换图片,q键退出。

显示结果

下面是img_3dbox.py文件,该文件也需要通过命令行窗口运行。
python img_3dbox.py --path_dataset data\\object
img_3dbox.py为文件面, data\\object为要显示的数据文件夹上级目录相对位置。
import cv2
import numpy as np
from data.kitti_Dataset import Kitti_Dataset


from pathlib import Path
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--path_dataset', type=str, default=None, help='dir for the label data', required=True)
args = parser.parse_args()

# 根据偏航角计算旋转矩阵(逆时针旋转)
def rot_y(rotation_y):
    cos = np.cos(rotation_y)
    sin = np.sin(rotation_y)
    R = np.array([[cos, 0, sin], [0, 1, 0], [-sin, 0, cos]])
    return R

if __name__ == "__main__":
    # 读取的数据的文件夹
    # 文件夹分为三级 dir_path\ training or test \ calib,label,bin,image
    # G:\czq\tsinghua\label_test
    # dir_path = 'G:\\czq\\tsinghua\\2_caozhenqiang'
    # dir_path =Path(args.path_label)

    dir_path = Path(args.path_dataset)
    # 读取训练集文件夹
    split = "training"
    dataset = Kitti_Dataset(dir_path, split=split)

    k = 0
    img3_d = dataset.get_rgb(k)
    print(img3_d.shape)
    max_num = 100
    # 逐张读入图片
    while True:


        img3_d = dataset.get_rgb(k)

        calib = dataset.get_calib(k)

        # 获取标签数据
        obj = dataset.get_labels(k)

        # 逐个读入一副图片中的所有object的标签
        for num in range(len(obj)):
            if obj[num].name == "Car" or obj[num].name == "Pedestrian" or obj[num].name == "Cyclist":
            	#这一行为阈值用来过滤训练概率较低的object
                #if (obj[num].name == "Car" and obj[num].ioc >= 0.7) or obj[num].ioc > 0.5:

                	# step1 得到rot_y旋转矩阵 3*3
                    R = rot_y(obj[num].rotation_y)
                    # 读取obect物体的高宽长信息
                    h, w, l = obj[num].dimensions[0], obj[num].dimensions[1], obj[num].dimensions[2]

                    # step2
                    # 得到该物体的坐标以底面为原点中心所在的物体坐标系下各个点的坐标
                    #     7 -------- 4
                    #    /|         /|
                    #   6 -------- 5 .
                    #   | |        | |
                    #   . 3 -------- 0
                    #   |/   .- - -|/ - - -> (x)
                    #   2 ---|----- 1
                    #        |
                    #        | (y)
                    x = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
                    y = [0, 0, 0, 0, -h, -h, -h, -h]
                    z = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
                    # 将xyz转化成3*8的矩阵
                    corner_3d = np.vstack([x, y, z])
                    # R * X
                    corner_3d = np.dot(R, corner_3d)

                    # 将该物体移动到相机坐标系下的原点处(涉及到坐标的移动,直接相加就行)
                    corner_3d[0, :] += obj[num].location[0]
                    corner_3d[1, :] += obj[num].location[1]
                    corner_3d[2, :] += obj[num].location[2]

                    # 将3d的bbox转换到2d坐标系中(需要用到内参矩阵)
                    corner_3d = np.vstack((corner_3d, np.zeros((1, corner_3d.shape[-1]))))
                    corner_2d = np.dot(calib.P2, corner_3d)
                    # 在像素坐标系下,横坐标x = corner_2d[0, :] /= corner_2d[2, :]
                    # 纵坐标的值以此类推
                    corner_2d[0, :] /= corner_2d[2, :]
                    corner_2d[1, :] /= corner_2d[2, :]

                    corner_2d = np.array(corner_2d, dtype=np.int)

                    # 绘制立方体边界框
                    color = [0, 255, 0]
                    # 线宽
                    thickness = 2

                    #绘制3d框
                    for corner_i in range(0, 4):
                        i, j = corner_i, (corner_i + 1) % 4
                        cv2.line(img3_d, (corner_2d[0, i], corner_2d[1, i]), (corner_2d[0, j], corner_2d[1, j]), color, thickness)
                        i, j = corner_i + 4, (corner_i + 1) % 4 + 4
                        cv2.line(img3_d, (corner_2d[0, i], corner_2d[1, i]), (corner_2d[0, j], corner_2d[1, j]), color, thickness)
                        i, j = corner_i, corner_i + 4
                        cv2.line(img3_d, (corner_2d[0, i], corner_2d[1, i]), (corner_2d[0, j], corner_2d[1, j]), color, thickness)


                    cv2.line(img3_d,(corner_2d[0, 0],corner_2d[1, 0]), (corner_2d[0, 5], corner_2d[1, 5]),color, thickness)
                    cv2.line(img3_d, (corner_2d[0, 1], corner_2d[1, 1]), (corner_2d[0, 4], corner_2d[1, 4]), color, thickness)
        cv2.imshow("{}".format(k), img3_d, )
        cv2.moveWindow("{}", 300, 50)
        key = cv2.waitKey(100) & 0xFF
        if key == ord('d'):
            k += 1
            cv2.destroyAllWindows()
            # if idx == 104:
            #     idx += 1
        if key == ord('a'):
            k -= 1

        if key == ord('q'):
            break
        if k >= max_num:
            k = max_num - 1
        if k < 0:
            k = 0
        # 读入图片信息


        # cv2.destroyAllWindows()

后续内容介绍

  • 上述代码讲解
  • 点云渲染介绍
  • 视角介绍
  • open3d介绍

参考

Open3d官方文档
各种open3d教程
kitti类解析


版权声明:本文为weixin_44491667原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。