您当前的位置: 首页 > 

行人检测0-09:LFFD-源码无死角解析(4)-预测代码解析

发布时间:2019-10-28 10:26:50 ,浏览量:6

以下链接是个人关于LFFD(行人检测)所有见解,如有错误欢迎大家指出,我会第一时间纠正。有兴趣的朋友可以加微信:17575010159 相互讨论技术。若是帮助到了你什么,一定要记得点赞!因为这是对我最大的鼓励。 文 末 附 带 \color{blue}{文末附带} 文末附带 公 众 号 − \color{blue}{公众号 -} 公众号− 海 量 资 源 。 \color{blue}{ 海量资源}。 海量资源。

行人检测0-00:LFFD-史上最新无死角详细解读:https://blog.csdn.net/weixin_43013761/article/details/102592374

代码注解

在很全面的博客,就已经运行了pedestrian_detection/accuracy_evaluation/predict.py程序,但是没有对他进行解析,现在我们就来分析一下吧(如果注释没有看懂,到末尾看总结提示):

# coding: utf-8 import sys import os import numpy import cv2 import sys
sys.path.append('../') # empty data batch class for dynamical properties class DataBatch: pass def NMS(boxes, overlap_threshold): '''

    :param boxes: numpy nx5, n is the number of boxes, 0:4->x1, y1, x2, y2, 4->score
    :param overlap_threshold:
    :return:
    ''' if boxes.shape[0] == 0: return boxes # if the bounding boxes integers, convert them to floats -- # this is important since we'll be doing a bunch of divisions if boxes.dtype != numpy.float32: boxes = boxes.astype(numpy.float32) # initialize the list of picked indexes pick = [] # grab the coordinates of the bounding boxes x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] sc = boxes[:, 4] widths = x2 - x1
    heights = y2 - y1 # compute the area of the bounding boxes and sort the bounding # boxes by the bottom-right y-coordinate of the bounding box area = heights * widths
    idxs = numpy.argsort(sc) # 从小到大排序 # keep looping while some indexes still remain in the indexes list while len(idxs) > 0: # grab the last index in the indexes list and add the # index value to the list of picked indexes last = len(idxs) - 1 i = idxs[last] pick.append(i) # compare secend highest score boxes xx1 = numpy.maximum(x1[i], x1[idxs[:last]]) yy1 = numpy.maximum(y1[i], y1[idxs[:last]]) xx2 = numpy.minimum(x2[i], x2[idxs[:last]]) yy2 = numpy.minimum(y2[i], y2[idxs[:last]]) # compute the width and height of the bo( box w = numpy.maximum(0, xx2 - xx1 + 1) h = numpy.maximum(0, yy2 - yy1 + 1) # compute the ratio of overlap overlap = (w * h) / area[idxs[:last]] # delete all indexes from the index list that have idxs = numpy.delete(idxs, numpy.concatenate(([last], numpy.where(overlap > overlap_threshold)[0]))) # return only the bounding boxes that were picked using the # integer data type return boxes[pick] class Predict(object): def __init__(self, mxnet, symbol_file_path, model_file_path, ctx, receptive_field_list, receptive_field_stride, bbox_small_list, bbox_large_list, receptive_field_center_start, num_output_scales ): self.mxnet = mxnet
        self.symbol_file_path = symbol_file_path
        self.model_file_path = model_file_path
        self.ctx = ctx # [60, 100, 180, 320] self.receptive_field_list = receptive_field_list # [8, 16, 32, 64] self.receptive_field_stride = receptive_field_stride # [30, 60, 100, 180] self.bbox_small_list = bbox_small_list # [60, 100, 180, 320] self.bbox_large_list = bbox_large_list # [7, 15, 31, 63] self.receptive_field_center_start = receptive_field_center_start # num_output_scales = 6 self.num_output_scales = num_output_scales # 中心的位置,为RF的中点,所以除以2。该参量在预测结果,映射到原图坐标时,需要用到。 self.constant = [i / 2.0 for i in self.receptive_field_list] # 输入图片的高 self.input_height = 480 # 输入图片的宽 self.input_width = 640 # 模型加载,主要设定输入图片的大小和batch_size=1 self.__load_model() # 模型加载 def __load_model(self): # load symbol and parameters print('----> load symbol file: %s\n----> load model file: %s' % (self.symbol_file_path, self.model_file_path)) if not os.path.exists(self.symbol_file_path): print('The symbol file does not exist!!!!') sys.exit(1) if not os.path.exists(self.model_file_path): print('The model file does not exist!!!!') sys.exit(1) self.symbol_net = self.mxnet.symbol.load(self.symbol_file_path) data_name = 'data' data_name_shape = (data_name, (1, 3, self.input_height, self.input_width)) self.module = self.mxnet.module.Module(symbol=self.symbol_net, data_names=[data_name], label_names=None, context=self.ctx, work_load_list=None) self.module.bind(data_shapes=[data_name_shape], for_training=False) save_dict = self.mxnet.nd.load(self.model_file_path) self.arg_name_arrays = dict() self.arg_name_arrays['data'] = self.mxnet.nd.zeros((1, 3, self.input_height, self.input_width), self.ctx) self.aux_name_arrays = {} for k, v in save_dict.items(): tp, name = k.split(':', 1) if tp == 'arg': self.arg_name_arrays.update({name: v.as_in_context(self.ctx)}) if tp == 'aux': self.aux_name_arrays.update({name: v.as_in_context(self.ctx)}) self.module.init_params(arg_params=self.arg_name_arrays, aux_params=self.aux_name_arrays, allow_missing=True) print('----> Model is loaded successfully.') # def predict(self, image, resize_scale=1., score_threshold=0.8, top_k=100, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[]): # 判断输入图像的维度和通道数,不符合条件则报错 if image.ndim != 3 or image.shape[2] != 3: print('Only RGB images are supported.') return None # 收集所有预测出来的box bbox_collection = [] # 把图片缩放到合理尺寸,原本长宽为480*680,现在选择最小的为480 shorter_side = min(image.shape[:2]) # 如果图像进行缩放后小于128 if shorter_side * resize_scale < 128: # 则对图片进行扩大,扩大到128 resize_scale = float(128) / shorter_side # 对图片进行缩放,resize_scale=1, 所以大小还是和原图一样[480,680] input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale) input_image = input_image.astype(dtype=numpy.float32) # 增加一个维度,为了满足网络输入需要四个维度 input_image = input_image[:, :, :, numpy.newaxis] input_image = input_image.transpose([3, 2, 0, 1]) # 不知道干嘛,进去空的,一脸懵逼的出来,大概值作者觉得或许哪天需要加东西,会比较方便吧 data_batch = DataBatch() # 把图片转化为mxnet需要的格式 data_batch.data = [self.mxnet.ndarray.array(input_image, self.ctx)] # (1, 3, 480, 640) self.module.forward(data_batch=data_batch, is_train=False) # 获得模型的输出结果 # 第一个分支(两个结果):mask map(1, 1, 59, 79),人脸box中心偏移值(1, 4, 59, 79) # 第二个分支(两个结果):mask map(1, 1, 29, 39),人脸box中心偏移值(1, 4, 29, 39) # 第三个分支(两个结果):mask map(1, 1, 14, 19),人脸box中心偏移值(1, 4, 14, 19) # 第四个分支(两个结果):mask map(1, 1,  6,  9),人脸box中心偏移值(1, 4,  6,  9) results = self.module.get_outputs() # 把结果转化为numpy格式保存到outputs outputs = [] for output in results: outputs.append(output.asnumpy()) # 对每个尺寸的预测分别做处理 for i in range(self.num_output_scales): if i in skip_scale_branch_list: continue # 把第一个维度去掉,只留下行和列,如第一循环为score_map[59,79] score_map = numpy.squeeze(outputs[i * 2], (0, 1)) # 把归一化的mask map转化到0~255 score_map_show = score_map * 255 score_map_show[score_map_show < 0] = 0 score_map_show[score_map_show > 255] = 255 # 把预测的mask map打印出来看看 cv2.imshow('score_map' + str(i), cv2.resize(score_map_show.astype(dtype=numpy.uint8), (0, 0), fx=2, fy=2)) cv2.waitKey() # 把第一个维度去掉,只留下行和列,如第一循环为score_map[59,79],并且这里只保留了box map bbox_map = numpy.squeeze(outputs[i * 2 + 1], 0) # 求出特征图每个RF对应在原图中x(宽度)轴中心的中心, RF_center_Xs = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * x for x in range(score_map.shape[1])]) # 暂时把RF_center_Xs[79]进行复制,复制成[59,79],也就是说,每一行的内容都是相同的 RF_center_Xs_mat = numpy.tile(RF_center_Xs, [score_map.shape[0], 1]) # 求出RF对应在原图中y(高度)轴中心的中心, RF_center_Ys = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * y for y in range(score_map.shape[0])]) # 暂时把RF_center_Xs[59]进行复制,复制成[59,79],也就是说,每一列的内容都是相同的 RF_center_Ys_mat = numpy.tile(RF_center_Ys, [score_map.shape[1], 1]).T # bbox_map[0, :, :] * self.constant[i]相当于得到在原图中的偏移量,然后计算结合RF_center_Xs_mat算出原图对应的位置 x_lt_mat = RF_center_Xs_mat - bbox_map[0, :, :] * self.constant[i] y_lt_mat = RF_center_Ys_mat - bbox_map[1, :, :] * self.constant[i] x_rb_mat = RF_center_Xs_mat - bbox_map[2, :, :] * self.constant[i] y_rb_mat = RF_center_Ys_mat - bbox_map[3, :, :] * self.constant[i] # 缩放到最初的大小,如果预测的box坐标有负数,直接使用0代替 x_lt_mat = x_lt_mat / resize_scale
            x_lt_mat[x_lt_mat < 0] = 0 y_lt_mat = y_lt_mat / resize_scale
            y_lt_mat[y_lt_mat < 0] = 0 x_rb_mat = x_rb_mat / resize_scale
            x_rb_mat[x_rb_mat > image.shape[1]] = image.shape[1] y_rb_mat = y_rb_mat / resize_scale
            y_rb_mat[y_rb_mat > image.shape[0]] = image.shape[0] # 选出预测人脸概率超过阈值的box对应的下标 select_index = numpy.where(score_map > score_threshold) for idx in range(select_index[0].size): bbox_collection.append((x_lt_mat[select_index[0][idx], select_index[1][idx]], y_lt_mat[select_index[0][idx], select_index[1][idx]], x_rb_mat[select_index[0][idx], select_index[1][idx]], y_rb_mat[select_index[0][idx], select_index[1][idx]], score_map[select_index[0][idx], select_index[1][idx]])) # bbox_collection按照置信度(人脸概率)进行一个排序, bbox_collection = sorted(bbox_collection, key=lambda item: item[-1], reverse=True) # 只选出人脸概率高的前top_k if len(bbox_collection) > top_k: bbox_collection = bbox_collection[0:top_k] bbox_collection_numpy = numpy.array(bbox_collection, dtype=numpy.float32) # NMS,这个就不说了,就是一个多框去重操作 if NMS_flag: final_bboxes = NMS(bbox_collection_numpy, NMS_threshold) final_bboxes_ = [] for i in range(final_bboxes.shape[0]): final_bboxes_.append((final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4])) return final_bboxes_ else: return bbox_collection_numpy def run_prediction_pickle(): from config_farm import configuration_30_320_20L_4scales_v1 as cfg import mxnet # 指定测试集的数据 data_pickle_file_path = '../data_provider_farm/data_folder/data_list_caltech_test_source.pkl' from data_provider_farm.pickle_provider import PickleProvider # 加载测速数据 pickle_provider = PickleProvider(data_pickle_file_path) # 获得正负样本的下标索引 positive_index = pickle_provider.positive_index
    negative_index = pickle_provider.negative_index

    all_index = positive_index #+negative_index # 打印正负样本的数目 print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index))) import random # 所有样本的 random.shuffle(all_index) # 模型符号 symbol_file_path = '../symbol_farm/symbol_30_320_20L_4scales_v1_deploy.json' # 模型参数 model_file_path = '../saved_model/configuration_30_320_20L_4scales_v1_2019-09-11-19-25-41/train_30_320_20L_4scales_v1_iter_500000.params' # 获得对数据进行预测的类对象 my_predictor = Predict(mxnet=mxnet, symbol_file_path=symbol_file_path, model_file_path=model_file_path, ctx=mxnet.gpu(0), receptive_field_list=cfg.param_receptive_field_list, receptive_field_stride=cfg.param_receptive_field_stride, bbox_small_list=cfg.param_bbox_small_list, bbox_large_list=cfg.param_bbox_large_list, receptive_field_center_start=cfg.param_receptive_field_center_start, num_output_scales=cfg.param_num_output_scales) for idx in all_index: # 获得需要测试图片的像素以及对应的box im, _, bboxes_gt = pickle_provider.read_by_index(idx) # 进行预测,获得处理过的box bboxes = my_predictor.predict(im, resize_scale=1, score_threshold=0.5, top_k=10000, NMS_threshold=0.5) # 把预测的box在原图中绘画出来 for bbox in bboxes: cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('im', im) cv2.waitKey() cv2.imwrite('./test_images/'+str(idx)+'.jpg', im) def run_prediction_folder(): from config_farm import configuration_30_320_20L_4scales_v1 as cfg import mxnet

    debug_folder = './test_images' file_name_list = [file_name for file_name in os.listdir(debug_folder) if file_name.lower().endswith('jpg') or file_name.lower().endswith('png')] symbol_file_path = '../symbol_farm/symbol_30_320_20L_4scales_v1_deploy.json' model_file_path = '../saved_model/configuration_30_320_20L_4scales_v1_2019-09-11-19-25-41/train_30_320_20L_4scales_v1_iter_500000.params' my_predictor = Predict(mxnet=mxnet, symbol_file_path=symbol_file_path, model_file_path=model_file_path, ctx=mxnet.gpu(0), receptive_field_list=cfg.param_receptive_field_list, receptive_field_stride=cfg.param_receptive_field_stride, bbox_small_list=cfg.param_bbox_small_list, bbox_large_list=cfg.param_bbox_large_list, receptive_field_center_start=cfg.param_receptive_field_center_start, num_output_scales=cfg.param_num_output_scales) for file_name in file_name_list: im = cv2.imread(os.path.join(debug_folder, file_name)) bboxes = my_predictor.predict(im, resize_scale=1, score_threshold=0.5, top_k=10000, NMS_threshold=0.5, NMS_flag=True, skip_scale_branch_list=[]) for bbox in bboxes: cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) if max(im.shape[:2]) > 1440: scale = 1440/max(im.shape[:2]) im = cv2.resize(im, (0, 0), fx=scale, fy=scale) # cv2.imshow('im', im) # cv2.waitKey() if __name__ == '__main__': run_prediction_pickle() # run_prediction_folder() 

感觉不是很重要的东西,我就没有注释了,在这里给大家提示一下总结或者提示:

1. 预测有四个分支,对应四个尺寸 2. 每个分支有两个结果,一个为mask(同时表示置人脸置信度),一个为box偏移值 3. 把没有必要的预测结果设置为0,在挑选出置信度高的前100个box 4. 做NMS(最大值抑制计算)

在这里插入图片描述

关注
打赏
1688896170
查看更多评论

暂无认证

  • 6浏览

    0关注

    115984博文

    0收益

  • 0浏览

    0点赞

    0打赏

    0留言

私信
关注
热门博文
立即登录/注册

微信扫码登录

0.1075s