YOLOV5.5-P5(1280)部署到OpenVINO<二、环境安装与性能验证>-526互联

YOLOV5.5-P5(640)部署到OpenVINO<一、环境安装与性能验证>

YOLOV5.5-P6(1280)部署到OpenVINO<二、环境安装与性能验证>

　　步骤和上一节差不多。

1、在yolov5.5 export.py中将yolov5s6.pt转为onnx

--weights yolov5s6.pt --img 1280 --batch 1

2、在网页版netron查看四个输出层，搜索Transpose就能看到

3、在openvino安装目录下执行mo.py

python mo.py --input_model yolov5s6.onnx --model_name yolov5s6 -s 255 --reverse_input_channels --output  Conv_307,Conv_323,Conv_339,Conv_355
python mo.py --input_model yolov5m6.onnx --model_name yolov5m6 -s 255 --reverse_input_channels --output  Conv_405,Conv_421,Conv_437,Conv_453

4、这里由于图像输入分辨率是1280*1280，并且输出四组特征图，所以和之前640分辨率的模型还是不一样，首先，先普及一个概念(以coco数据集为例子)：

yolov5-p5(640)

对于5s、5m、5l、5x模型，有九个anchors：

# anchors
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

　　而模型输出三组特征图：1*255*80*80、1*255*40*40、1*255*20*20，分别记为：P80、P40、P20，例如：P80中，本身包含3个候选框，和先验anchors的对应关系为：

P80 的三个候选框 -> [10,13, 16,30, 33,23] # P3/8
P40 的三个候选框 -> [30,61, 62,45, 59,119] # P4/16
P20 的三个候选框 -> [116,90, 156,198, 373,326] # P5/32

yolov5-p6(1280)　　

　　同理，如果知道p6(1280)模型的anchors：

(P6的配置文件“yolov5s6.yaml”在：源码hub目录下)

# anchors
anchors:
  - [ 19,27,  44,40,  38,94 ]  # P3/8
  - [ 96,68,  86,152,  180,137 ]  # P4/16
  - [ 140,301,  303,264,  238,542 ]  # P5/32
  - [ 436,615,  739,380,  925,792 ]  # P6/64

　　就可以知道，p6模型输出的四个特征图和先验anchors的对应关系：

P160 的三个候选框 -> [ 19,27, 44,40, 38,94 ] # P3/8
P80 的三个候选框 -> [ 96,68, 86,152, 180,137 ] # P4/16
P40 的三个候选框 -> [ 140,301, 303,264, 238,542 ] # P5/32
P20 的三个候选框 -> [ 436,615, 739,380, 925,792 ] # P6/64

以下是代码：

  1 # 命令行参数：-m data/yolov5m.xml -i D:/Data/dog.jpg -at yolov5 --labels data/coco.names -d CPU
  2 from __future__ import print_function, division
  3 import sys
  4 from time import time
  5 import numpy as np
  6 
  7 import ngraph
  8 import cv2
  9 from openvino.inference_engine import IENetwork, IECore
 10 
 11 labels = "data/coco.names"
 12 device = "CPU"
 13 img_file = "d:/Data/dog.jpg"
 14 model_xml = "data/yolov5s6.xml"
 15 prob_threshold = 0.25
 16 iou_threshold = 0.45
 17 in_res640 = 640
 18 in_res1280 = 1280  # 640 for 5s 5m 5l 5x
 19 
 20 class YoloParams:
 21     # ------------------------------------------- Extracting layer parameters ------------------------------------------
 22     # Magic numbers are copied from yolo samples
 23     def __init__(self, param, side):
 24         self.coords = 4 if 'coords' not in param else int(param['coords'])
 25         self.classes = 80 if 'classes' not in param else int(param['classes'])
 26         self.side = side
 27         self.num = 3
 28         # self.anchors = [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0]
 29         self.anchors = [  19, 27,  44, 40,  38, 94,         # P3/8
 30                           96, 68,  86, 152,  180, 137,      # P4/16
 31                           140, 301,  303, 264,  238, 542,   # P5/32
 32                           436, 615,  739, 380,  925, 792 ]  # P6/64
 33 
 34 def letterbox(img, size=(in_res1280, in_res1280), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
 35     # Resize image to a 32-pixel-multiple rectangle（矩形推理） https://github.com/ultralytics/yolov3/issues/232
 36     shape = img.shape[:2]  # current shape [height, width]
 37     w, h = size
 38 
 39     # Scale ratio (new / old)
 40     r = min(h / shape[0], w / shape[1])
 41     if not scaleup:  # only scale down, do not scale up (for better test mAP)
 42         r = min(r, 1.0)
 43 
 44     # Compute padding
 45     ratio = r, r  # width, height ratios
 46     new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
 47     dw, dh = w - new_unpad[0], h - new_unpad[1]  # wh padding
 48     if auto:  # minimum rectangle
 49         dw, dh = np.mod(dw, 64), np.mod(dh, 64)  # wh padding
 50     elif scaleFill:  # stretch
 51         dw, dh = 0.0, 0.0
 52         new_unpad = (w, h)
 53         ratio = w / shape[1], h / shape[0]  # width, height ratios
 54 
 55     dw /= 2  # divide padding into 2 sides
 56     dh /= 2
 57 
 58     if shape[::-1] != new_unpad:  # resize
 59         img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
 60     top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
 61     left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
 62     img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
 63 
 64     top2, bottom2, left2, right2 = 0, 0, 0, 0
 65     if img.shape[0] != h:
 66         top2 = (h - img.shape[0]) // 2
 67         bottom2 = top2
 68         img = cv2.copyMakeBorder(img, top2, bottom2, left2, right2, cv2.BORDER_CONSTANT, value=color)  # add border
 69     elif img.shape[1] != w:
 70         left2 = (w - img.shape[1]) // 2
 71         right2 = left2
 72         img = cv2.copyMakeBorder(img, top2, bottom2, left2, right2, cv2.BORDER_CONSTANT, value=color)  # add border
 73     return img
 74 
 75 """
 76 @brief:
 77 x、y、height、width:网络最后三个尺度特征图对应在640*640 or 1280*1280图像上的先验眶
 78 class_id: 0-80
 79 confidence: Pr(object)*Pr(Classi|Object)
 80 im_h: 原始图高度
 81 im_w: 原始图宽度
 82 """
 83 def scale_bbox(x, y, height, width, class_id, confidence, im_h, im_w, resized_im_h=in_res1280, resized_im_w=in_res1280):
 84     gain = min(resized_im_w / im_w, resized_im_h / im_h)  # gain  = old / new
 85     pad = (resized_im_w - im_w * gain) / 2, (resized_im_h - im_h * gain) / 2  # wh padding
 86     x = int((x - pad[0]) / gain)
 87     y = int((y - pad[1]) / gain)
 88 
 89     w = int(width / gain)
 90     h = int(height / gain)
 91 
 92     xmin = max(0, int(x - w / 2))
 93     ymin = max(0, int(y - h / 2))
 94     xmax = min(im_w, int(xmin + w))
 95     ymax = min(im_h, int(ymin + h))
 96     # Method item() used here to convert NumPy types to native types for compatibility with functions, which don't
 97     # support Numpy types (e.g., cv2.rectangle doesn't support int64 in color parameter)
 98     return dict(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, class_id=class_id.item(), confidence=confidence.item())
 99 
100 
101 def entry_index(side, coord, classes, location, entry):
102     side_power_2 = side ** 2
103     n = location // side_power_2
104     loc = location % side_power_2
105     return int(side_power_2 * (n * (coord + classes + 1) + entry) + loc)
106 
107 
108 # 解析特征图，x y w h 坐标复原，类别置信度计算
109 def parse_yolo_region(blob, resized_image_shape, original_im_shape, params, threshold):
110     # ------------------------------------------ Validating output parameters ------------------------------------------
111     out_blob_n, out_blob_c, out_blob_h, out_blob_w = blob.shape
112     predictions = 1.0 / (1.0 + np.exp(-blob))  # sigmoid，是所有的值都要sigmoid
113 
114     assert out_blob_w == out_blob_h, "Invalid size of output blob. It sould be in NCHW layout and height should " \
115                                      "be equal to width. Current height = {}, current width = {}" \
116                                      "".format(out_blob_h, out_blob_w)
117 
118     # ------------------------------------------ Extracting layer parameters -------------------------------------------
119     orig_im_h, orig_im_w = original_im_shape
120     resized_image_h, resized_image_w = resized_image_shape  # 640 640 or 1280 1280
121     objects = list()
122 
123     side_square = params.side[1] * params.side[0]  # 80*80
124 
125     # ------------------------------------------- Parsing YOLO Region output -------------------------------------------
126     bbox_size = int(out_blob_c / params.num)  # 4+1+num_classes   255 / 3 = 85
127     # print('bbox_size = ' + str(bbox_size))
128     # print('bbox_size = ' + str(bbox_size))
129     for row, col, n in np.ndindex(params.side[0], params.side[1], params.num):  # 160, 160, 3 or 80, 80, 3; 40, 40, 3; 20, 20, 3
130         # 取出1*255*80*80 中的一个元素，大小为 85
131         bbox = predictions[0, n * bbox_size:(n + 1) * bbox_size, row, col]
132         # print("n * bbox_size = ", n * bbox_size)
133         # print("bbox_size = ", (n + 1) * bbox_size)
134 
135         x, y, width, height, object_probability = bbox[:5]
136 
137         if object_probability < threshold:
138             continue
139         class_probabilities = bbox[5:]
140         # print('resized_image_w = ' + str(resized_image_w))
141         # print('out_blob_w = ' + str(out_blob_w))
142 
143         x = (2 * x - 0.5 + col) * (resized_image_w / out_blob_w)
144         y = (2 * y - 0.5 + row) * (resized_image_h / out_blob_h)
145         # 下面的if elif判断语句(用于计算idx)可以拿到for循环外边！！！
146         if int(resized_image_w / out_blob_w) == 8 & int(resized_image_h / out_blob_h) == 8:  # 80x80,
147             idx = 0
148         elif int(resized_image_w / out_blob_w) == 16 & int(resized_image_h / out_blob_h) == 16:  # 40x40
149             idx = 1
150         elif int(resized_image_w / out_blob_w) == 32 & int(resized_image_h / out_blob_h) == 32:  # 20x20
151             idx = 2
152         elif int(resized_image_w / out_blob_w) == 64 & int(resized_image_h / out_blob_h) == 64:  # 20x20
153             idx = 3
154         elif int(resized_image_w / out_blob_w) == 128 & int(resized_image_h / out_blob_h) == 128:  # 20x20
155             idx = 4
156         # idx表示步距；n表示通道
157         # idx = 0, n = 0 1 2 -> 0 1, 2 3, 3 4;第一行anchors
158         # idx = 1, n = 0 1 2 ->               第二行anchors
159         # idx = 2, n = 0 1 2 ->               第三行anchors
160         # idx = 3, n = 0 1 2 ->               第四行anchors
161         # 结论：
162         # 对于1*160*160*255的特征图，每3个点(即：1*1*1*255)对应3个anchors，即：第一行anchors
163         # 对于1*80*80*255的特征图，  每3个点(即：1*1*1*255)对应3个anchors，即：第二行anchors
164         # 对于1*40*40*255的特征图，  每3个点(即：1*1*1*255)对应3个anchors，即：第三行anchors
165         # 对于1*20*20*255的特征图，  每3个点(即：1*1*1*255)对应3个anchors，即：第四行anchors
166         width = (2 * width) ** 2 * params.anchors[idx * 6 + 2 * n]
167         height = (2 * height) ** 2 * params.anchors[idx * 6 + 2 * n + 1]
168 
169         class_id = np.argmax(class_probabilities)  # max(Pr(classi|Object))
170         # confidence = class_probabilities[class_id]
171         confidence = class_probabilities[class_id] * bbox[4]  # sry 这里改成这个才对？？？？？？？？？？？？？？？？
172         objects.append(scale_bbox(x=x, y=y, height=height, width=width, class_id=class_id, confidence=confidence,
173                                   im_h=orig_im_h, im_w=orig_im_w, resized_im_h=resized_image_h,resized_im_w=resized_image_w))
174     return objects
175 
176 
177 # 计算IOU
178 def intersection_over_union(box_1, box_2):
179     # 计算交集
180     width_of_overlap_area = min(box_1['xmax'], box_2['xmax']) - max(box_1['xmin'], box_2['xmin'])
181     height_of_overlap_area = min(box_1['ymax'], box_2['ymax']) - max(box_1['ymin'], box_2['ymin'])
182     if width_of_overlap_area < 0 or height_of_overlap_area < 0:
183         area_of_overlap = 0
184     else:
185         area_of_overlap = width_of_overlap_area * height_of_overlap_area
186     # 计算并集
187     box_1_area = (box_1['ymax'] - box_1['ymin']) * (box_1['xmax'] - box_1['xmin'])
188     box_2_area = (box_2['ymax'] - box_2['ymin']) * (box_2['xmax'] - box_2['xmin'])
189     area_of_union = box_1_area + box_2_area - area_of_overlap
190     if area_of_union == 0:
191         return 0
192     return area_of_overlap / area_of_union
193 
194 
195 def main():
196     # ------------- 1. Plugin initialization for specified device and load extensions library if specified -------------
197     ie = IECore()
198     # -------------------- 2. Reading the IR generated by the Model Optimizer (.xml and .bin files) --------------------
199     net = ie.read_network(model=model_xml)
200     # ---------------------------------- 3. Load CPU extension for support specific layer ------------------------------
201     # ...
202     # ---------------------------------------------- 4. Preparing inputs -----------------------------------------------
203     input_blob = next(iter(net.input_info))
204     # sry
205     output_blob = list()
206     for output in iter(net.outputs):
207         output_blob.append(output)
208     #  Defaulf batch_size is 1
209     net.batch_size = 1
210     # Read and pre-process input images
211     n, c, h, w = net.inputs[input_blob].shape
212 
213     # api:https://docs.openvino.ai/latest/api/ngraph_python_api/_autosummary/ngraph.html#module-ngraph
214     ng_func = ngraph.function_from_cnn(net)  # 这里的计算图是转IR格式以后（优化以后），得到的新的优化版网络架构
215     yolo_layer_params = {}
216     for node in ng_func.get_ordered_ops():
217         layer_name = node.get_friendly_name()
218         # print(node, "||||||", layer_name)
219         if layer_name not in net.outputs:  # 拿到最后三个输出层
220             continue
221 
222         shape = list(node.inputs()[0].get_source_output().get_node().shape)
223         # yolo中的一些参数：先验框宽高、数量、类别数等
224         yolo_params = YoloParams(node._get_attributes(), shape[2:4])
225         yolo_layer_params[layer_name] = (shape, yolo_params)
226     # 读labels文件
227     with open(labels, 'r') as f:
228         labels_map = [x.strip() for x in f]
229     wait_key_code = 0
230     # ----------------------------------------- 5. Loading model to the plugin -----------------------------------------
231     exec_net = ie.load_network(network=net, device_name=device)
232     cost_time = 0
233     # ----------------------------------------------- 6. Doing inference -----------------------------------------------
234     frame = cv2.imread(img_file)
235     in_frame = letterbox(frame, (w, h))  # 这里没有用到矩形推理
236     # resize input_frame to network size
237     in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
238     in_frame = in_frame.reshape((n, c, h, w))
239 
240     # 推理&得到很多候选框
241     objects = list()
242     start_time = time()
243     # 1*80*80*255  1*40*40*255 1*20*20*255 (1*160*160*255)
244     output = exec_net.infer(inputs={input_blob: in_frame})
245     cost_time = time() - start_time
246     for layer_name, out_blob in output.items():
247         """
248         可以看到，目前就是anchors取值不对，因为有四个尺度的特征图，去看看yolo5.5的源码
249         另外注意，大图有关预测框的缩放也要注意！大不了拷贝过来啊！
250         """
251         # if out_blob.shape[2] == 20:
252         #     continue
253         layer_params = yolo_layer_params[layer_name]
254         out_blob.shape = layer_params[0]
255         objects += parse_yolo_region(out_blob, in_frame.shape[2:], frame.shape[:-1], layer_params[1], prob_threshold)
256 
257     # 从三个尺度的featureMap得到一堆候选框，对候选框进行NMS(这里的NSM比较简答粗暴，不仅慢，而且会导致漏检?)
258     objects = sorted(objects, key=lambda obj: obj['confidence'], reverse=True)  # 依据置信度，按从大到小进行排序
259     for i in range(len(objects)):
260         if objects[i]['confidence'] == 0:  # 这句有点扯，这里是浮点数
261             continue
262         for j in range(i + 1, len(objects)):
263             # 不分类别，仅按照IOU进行排序，凡是后续框和当前框(最大置信度)的IOU太大的，都删除
264             if intersection_over_union(objects[i], objects[j]) > iou_threshold:
265                 objects[j]['confidence'] = 0
266 
267     # 如果Pr(classi|Object) > threshold,则保留
268     objects = [obj for obj in objects if obj['confidence'] >= prob_threshold]
269     origin_im_size = frame.shape[:-1]
270     for obj in objects:
271         # Validation bbox of detected object
272         if obj['xmax'] > origin_im_size[1] or obj['ymax'] > origin_im_size[0] or obj['xmin'] < 0 or obj['ymin'] < 0:
273             continue
274         color = (int(max(255 - obj['class_id'] * 7.5, 0)), max(255 - obj['class_id'] * 10, 0), max(255 - obj['class_id'] * 12.5, 0))
275         det_label = labels_map[obj['class_id']] if labels_map and len(labels_map) >= obj['class_id'] else str(obj['class_id'])
276         cv2.rectangle(frame, (obj['xmin'], obj['ymin']), (obj['xmax'], obj['ymax']), color, 2)
277         cv2.putText(frame, "#" + det_label + ' ' + str(round(obj['confidence'] * 100, 1)) + ' %', (obj['xmin'], obj['ymin'] - 7), cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1)
278     # Draw performance stats over frame
279     cost_time_message = "inference time: {:.3f} ms".format(cost_time * 1e3)
280     cv2.putText(frame, cost_time_message, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
281     cv2.imshow("DetectionResults", frame)
282     cv2.waitKey(0)
283     cv2.destroyAllWindows()
284 
285 if __name__ == '__main__':
286     sys.exit(main() or 0)

View Code

　　P6比P5慢，但是对于同一个目标置信度更高，recall也更高。