行人检测0-09：LFFD-源码无死角解析(4)-预测代码解析

以下链接是个人关于LFFD(行人检测)所有见解，如有错误欢迎大家指出，我会第一时间纠正。有兴趣的朋友可以加微信：17575010159 相互讨论技术。若是帮助到了你什么，一定要记得点赞！因为这是对我最大的鼓励。文末附带 \color{blue}{文末附带} 文末附带公众号 − \color{blue}{公众号 -} 公众号− 海量资源。 \color{blue}{ 海量资源}。海量资源。

行人检测0-00：LFFD-史上最新无死角详细解读：https://blog.csdn.net/weixin_43013761/article/details/102592374

代码注解

在很全面的博客，就已经运行了pedestrian_detection/accuracy_evaluation/predict.py程序，但是没有对他进行解析，现在我们就来分析一下吧（如果注释没有看懂，到末尾看总结提示）：

# coding: utf-8
import sys
import os
import numpy
import cv2
import sys
sys.path.append('../')
# empty data batch class for dynamical properties
class DataBatch:
    pass


def NMS(boxes, overlap_threshold):
    '''

    :param boxes: numpy nx5, n is the number of boxes, 0:4->x1, y1, x2, y2, 4->score
    :param overlap_threshold:
    :return:
    '''
    if boxes.shape[0] == 0:
        return boxes

    # if the bounding boxes integers, convert them to floats --
    # this is important since we'll be doing a bunch of divisions
    if boxes.dtype != numpy.float32:
        boxes = boxes.astype(numpy.float32)

    # initialize the list of picked indexes
    pick = []
    # grab the coordinates of the bounding boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    sc = boxes[:, 4]
    widths = x2 - x1
    heights = y2 - y1

    # compute the area of the bounding boxes and sort the bounding
    # boxes by the bottom-right y-coordinate of the bounding box
    area = heights * widths
    idxs = numpy.argsort(sc)  # 从小到大排序

    # keep looping while some indexes still remain in the indexes list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the
        # index value to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        # compare secend highest score boxes
        xx1 = numpy.maximum(x1[i], x1[idxs[:last]])
        yy1 = numpy.maximum(y1[i], y1[idxs[:last]])
        xx2 = numpy.minimum(x2[i], x2[idxs[:last]])
        yy2 = numpy.minimum(y2[i], y2[idxs[:last]])

        # compute the width and height of the bo（ box
        w = numpy.maximum(0, xx2 - xx1 + 1)
        h = numpy.maximum(0, yy2 - yy1 + 1)

        # compute the ratio of overlap
        overlap = (w * h) / area[idxs[:last]]

        # delete all indexes from the index list that have
        idxs = numpy.delete(idxs, numpy.concatenate(([last], numpy.where(overlap > overlap_threshold)[0])))

    # return only the bounding boxes that were picked using the
    # integer data type
    return boxes[pick]


class Predict(object):

    def __init__(self,
                 mxnet,
                 symbol_file_path,
                 model_file_path,
                 ctx,
                 receptive_field_list,
                 receptive_field_stride,
                 bbox_small_list,
                 bbox_large_list,
                 receptive_field_center_start,
                 num_output_scales
                 ):
        self.mxnet = mxnet
        self.symbol_file_path = symbol_file_path
        self.model_file_path = model_file_path
        self.ctx = ctx


        # [60, 100, 180, 320]
        self.receptive_field_list = receptive_field_list

        # [8, 16, 32, 64]
        self.receptive_field_stride = receptive_field_stride

        # [30, 60, 100, 180]
        self.bbox_small_list = bbox_small_list

        # [60, 100, 180, 320]
        self.bbox_large_list = bbox_large_list

        # [7, 15, 31, 63]
        self.receptive_field_center_start = receptive_field_center_start

        # num_output_scales = 6
        self.num_output_scales = num_output_scales

        # 中心的位置，为RF的中点，所以除以2。该参量在预测结果，映射到原图坐标时，需要用到。
        self.constant = [i / 2.0 for i in self.receptive_field_list]

        # 输入图片的高
        self.input_height = 480
        # 输入图片的宽
        self.input_width = 640

        # 模型加载，主要设定输入图片的大小和batch_size=1
        self.__load_model()

    # 模型加载
    def __load_model(self):
        # load symbol and parameters
        print('----> load symbol file: %s\n----> load model file: %s' % (self.symbol_file_path, self.model_file_path))
        if not os.path.exists(self.symbol_file_path):
            print('The symbol file does not exist!!!!')
            sys.exit(1)
        if not os.path.exists(self.model_file_path):
            print('The model file does not exist!!!!')
            sys.exit(1)
        self.symbol_net = self.mxnet.symbol.load(self.symbol_file_path)
        data_name = 'data'
        data_name_shape = (data_name, (1, 3, self.input_height, self.input_width))
        self.module = self.mxnet.module.Module(symbol=self.symbol_net,
                                               data_names=[data_name],
                                               label_names=None,
                                               context=self.ctx,
                                               work_load_list=None)
        self.module.bind(data_shapes=[data_name_shape],
                         for_training=False)

        save_dict = self.mxnet.nd.load(self.model_file_path)
        self.arg_name_arrays = dict()
        self.arg_name_arrays['data'] = self.mxnet.nd.zeros((1, 3, self.input_height, self.input_width), self.ctx)
        self.aux_name_arrays = {}
        for k, v in save_dict.items():
            tp, name = k.split(':', 1)
            if tp == 'arg':
                self.arg_name_arrays.update({name: v.as_in_context(self.ctx)})
            if tp == 'aux':
                self.aux_name_arrays.update({name: v.as_in_context(self.ctx)})
        self.module.init_params(arg_params=self.arg_name_arrays,
                                aux_params=self.aux_name_arrays,
                                allow_missing=True)
        print('----> Model is loaded successfully.')

    #
    def predict(self, image, resize_scale=1., score_threshold=0.8, top_k=100, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[]):

        # 判断输入图像的维度和通道数，不符合条件则报错
        if image.ndim != 3 or image.shape[2] != 3:
            print('Only RGB images are supported.')
            return None

        # 收集所有预测出来的box
        bbox_collection = []

        # 把图片缩放到合理尺寸，原本长宽为480*680，现在选择最小的为480
        shorter_side = min(image.shape[:2])

        # 如果图像进行缩放后小于128
        if shorter_side * resize_scale  score_threshold)
            for idx in range(select_index[0].size):
                bbox_collection.append((x_lt_mat[select_index[0][idx], select_index[1][idx]],
                                        y_lt_mat[select_index[0][idx], select_index[1][idx]],
                                        x_rb_mat[select_index[0][idx], select_index[1][idx]],
                                        y_rb_mat[select_index[0][idx], select_index[1][idx]],
                                        score_map[select_index[0][idx], select_index[1][idx]]))

        # bbox_collection按照置信度（人脸概率）进行一个排序，
        bbox_collection = sorted(bbox_collection, key=lambda item: item[-1], reverse=True)

        # 只选出人脸概率高的前top_k
        if len(bbox_collection) > top_k:
            bbox_collection = bbox_collection[0:top_k]
        bbox_collection_numpy = numpy.array(bbox_collection, dtype=numpy.float32)

        # NMS，这个就不说了，就是一个多框去重操作
        if NMS_flag:
            final_bboxes = NMS(bbox_collection_numpy, NMS_threshold)
            final_bboxes_ = []
            for i in range(final_bboxes.shape[0]):
                final_bboxes_.append((final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4]))

            return final_bboxes_
        else:
            return bbox_collection_numpy


def run_prediction_pickle():
    from config_farm import configuration_30_320_20L_4scales_v1 as cfg
    import mxnet

    # 指定测试集的数据
    data_pickle_file_path = '../data_provider_farm/data_folder/data_list_caltech_test_source.pkl'
    from data_provider_farm.pickle_provider import PickleProvider

    # 加载测速数据
    pickle_provider = PickleProvider(data_pickle_file_path)

    # 获得正负样本的下标索引
    positive_index = pickle_provider.positive_index
    negative_index = pickle_provider.negative_index

    all_index = positive_index #+negative_index

    # 打印正负样本的数目
    print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
    import random
    # 所有样本的
    random.shuffle(all_index)

    # 模型符号
    symbol_file_path = '../symbol_farm/symbol_30_320_20L_4scales_v1_deploy.json'
    # 模型参数
    model_file_path = '../saved_model/configuration_30_320_20L_4scales_v1_2019-09-11-19-25-41/train_30_320_20L_4scales_v1_iter_500000.params'

    # 获得对数据进行预测的类对象
    my_predictor = Predict(mxnet=mxnet,
                           symbol_file_path=symbol_file_path,
                           model_file_path=model_file_path,
                           ctx=mxnet.gpu(0),
                           receptive_field_list=cfg.param_receptive_field_list,
                           receptive_field_stride=cfg.param_receptive_field_stride,
                           bbox_small_list=cfg.param_bbox_small_list,
                           bbox_large_list=cfg.param_bbox_large_list,
                           receptive_field_center_start=cfg.param_receptive_field_center_start,
                           num_output_scales=cfg.param_num_output_scales)

    for idx in all_index:
        # 获得需要测试图片的像素以及对应的box
        im, _, bboxes_gt = pickle_provider.read_by_index(idx)

        # 进行预测，获得处理过的box
        bboxes = my_predictor.predict(im, resize_scale=1, score_threshold=0.5, top_k=10000, NMS_threshold=0.5)

        # 把预测的box在原图中绘画出来
        for bbox in bboxes:
            cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)

        cv2.imshow('im', im)
        cv2.waitKey()
        cv2.imwrite('./test_images/'+str(idx)+'.jpg', im)


def run_prediction_folder():
    from config_farm import configuration_30_320_20L_4scales_v1 as cfg
    import mxnet

    debug_folder = './test_images'
    file_name_list = [file_name for file_name in os.listdir(debug_folder) if file_name.lower().endswith('jpg') or file_name.lower().endswith('png')]

    symbol_file_path = '../symbol_farm/symbol_30_320_20L_4scales_v1_deploy.json'
    model_file_path = '../saved_model/configuration_30_320_20L_4scales_v1_2019-09-11-19-25-41/train_30_320_20L_4scales_v1_iter_500000.params'
    my_predictor = Predict(mxnet=mxnet,
                           symbol_file_path=symbol_file_path,
                           model_file_path=model_file_path,
                           ctx=mxnet.gpu(0),
                           receptive_field_list=cfg.param_receptive_field_list,
                           receptive_field_stride=cfg.param_receptive_field_stride,
                           bbox_small_list=cfg.param_bbox_small_list,
                           bbox_large_list=cfg.param_bbox_large_list,
                           receptive_field_center_start=cfg.param_receptive_field_center_start,
                           num_output_scales=cfg.param_num_output_scales)

    for file_name in file_name_list:
        im = cv2.imread(os.path.join(debug_folder, file_name))

        bboxes = my_predictor.predict(im, resize_scale=1, score_threshold=0.5, top_k=10000, NMS_threshold=0.5, NMS_flag=True, skip_scale_branch_list=[])
        for bbox in bboxes:
            cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)

        if max(im.shape[:2]) > 1440:
            scale = 1440/max(im.shape[:2])
            im = cv2.resize(im, (0, 0), fx=scale, fy=scale)
        # cv2.imshow('im', im)
        # cv2.waitKey()


if __name__ == '__main__':
    run_prediction_pickle()
    # run_prediction_folder()

感觉不是很重要的东西，我就没有注释了，在这里给大家提示一下总结或者提示：

1. 预测有四个分支，对应四个尺寸
2. 每个分支有两个结果，一个为mask（同时表示置人脸置信度），一个为box偏移值
3. 把没有必要的预测结果设置为0，在挑选出置信度高的前100个box
4. 做NMS（最大值抑制计算）

在这里插入图片描述

行人检测0-09：LFFD-源码无死角解析(4)-预测代码解析

最近更新

热门博客

[ 申请 ]友情链接：