模型下载:
cd C:\OpenVINO\openvino_2020.4.287\deployment_tools\open_model_zoo\tools\downloader
执行:
python downloader.py --name face-detection-0102
下载好的模型文件在:C:\OpenVINO\openvino_2020.4.287\deployment_tools\open_model_zoo\tools\downloader\intel
一、单帧检测
注意代码42行,网络末端输出的是1*1*N*7大小的featureMap
1 import cv2 as cv 2 import numpy as np 3 from openvino.inference_engine import IECore 4 5 6 def ssd_demo(): 7 ''' 8 1、读取模型、声明推理器 9 ''' 10 ie = IECore() 11 for device in ie.available_devices: 12 print(device) 13 model_xml = "ssdv2_graph.xml" 14 model_bin = "ssdv2_graph.bin" 15 # read the net 16 net = ie.read_network(model=model_xml, weights=model_bin) 17 # 获取网络的输入、输出 18 input_blob = next(iter(net.inputs)) 19 out_blob = next(iter(net.outputs)) 20 n, c, h, w = net.inputs[input_blob].shape 21 print(n, c, h, w) # 1 3 224 224 xml文件前面就有 22 # 声明推理器 23 exec_net = ie.load_network(network=net, device_name="CPU") 24 ''' 25 2、读取类别文件 26 ''' 27 with open('object_detection_classes_coco.txt') as f: 28 labels = [line.strip() for line in f.readlines()] 29 ''' 30 3、读取图像和预处理 31 ''' 32 src = cv.imread("d:/3.png") 33 # resize 34 image = cv.resize(src, (w, h)) 35 # # -> c, h, w 36 image = image.transpose(2, 0, 1) 37 ''' 38 4、推理 & 解析结果 39 ''' 40 res = exec_net.infer(inputs={input_blob: [image]}) # []:使得c, h, w -> 1, c, h, w;其实就是batch_size = 1 41 res = res[out_blob] 42 # 获取网络输出:1*1*N*7 43 # 7: image_id label conf x_min y_min x_max y_max(后四个数是归一化后的数) 44 ih, iw, ic = src.shape 45 for obj in res[0][0]: 46 if obj[2] >0.5: 47 index = int(obj[1] - 1) # 减去1是background 48 xmin = int(obj[3] * iw) 49 ymin = int(obj[4] * ih) 50 xmax = int(obj[5] * iw) 51 ymax = int(obj[6] * ih) 52 print(labels[index]) 53 cv.rectangle(src, (xmin, ymin), (xmax, ymax), (0, 255, 255), 1, 8) 54 cv.putText(src, labels[index] + str(obj[2]), (xmin, ymin), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, 8) 55 56 ''' 57 5、可视化 58 ''' 59 cv.imshow("ssd object detection", src) 60 cv.waitKey(0) 61 62 63 if __name__ == "__main__": 64 ssd_demo()
二、视频检测
首先,我们知道SSD网络末端输出的是1*1*N*7的featureMap,这个东西,你可以查论文,也可以在openVINO安装目录下看到,如下图:
打开后:
下面是视频检测源码,可以看到我这个垃圾处理器i5-1135G7也能跑到60帧不到,openVINO给力啊。
1 import cv2 as cv 2 import numpy as np 3 import time # 用于单帧计时 4 from openvino.inference_engine import IECore 5 6 7 def ssd_image(): 8 ''' 9 1、读取模型、声明推理器 10 ''' 11 ie = IECore() 12 for device in ie.available_devices: 13 print(device) 14 model_xml = "ssdv2_graph.xml" 15 model_bin = "ssdv2_graph.bin" 16 # read the net 17 net = ie.read_network(model=model_xml, weights=model_bin) 18 # 获取网络的输入、输出 19 input_blob = next(iter(net.inputs)) 20 out_blob = next(iter(net.outputs)) 21 n, c, h, w = net.inputs[input_blob].shape 22 print(n, c, h, w) # 1 3 224 224 xml文件前面就有 23 # 声明推理器 24 exec_net = ie.load_network(network=net, device_name="CPU") 25 ''' 26 2、读取类别文件 27 ''' 28 with open('object_detection_classes_coco.txt') as f: 29 labels = [line.strip() for line in f.readlines()] 30 ''' 31 3、读取图像和预处理 32 ''' 33 src = cv.imread("d:/3.png") 34 # resize 35 image = cv.resize(src, (w, h)) 36 # # -> c, h, w 37 image = image.transpose(2, 0, 1) 38 ''' 39 4、推理 & 解析结果 40 ''' 41 res = exec_net.infer(inputs={input_blob: [image]}) # []:使得c, h, w -> 1, c, h, w;其实就是batch_size = 1 42 res = res[out_blob] 43 # 获取网络输出:1*1*N*7 44 # 7: image_id label conf x_min y_min x_max y_max(后四个数是归一化后的数) 45 ih, iw, ic = src.shape 46 for obj in res[0][0]: 47 if obj[2] >0.5: 48 index = int(obj[1] - 1) # 减去1是background 49 xmin = int(obj[3] * iw) 50 ymin = int(obj[4] * ih) 51 xmax = int(obj[5] * iw) 52 ymax = int(obj[6] * ih) 53 print(labels[index]) 54 cv.rectangle(src, (xmin, ymin), (xmax, ymax), (0, 255, 255), 1, 8) 55 cv.putText(src, labels[index] + str(obj[2]), (xmin, ymin), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, 8) 56 57 ''' 58 5、可视化 59 ''' 60 cv.imshow("ssd object detection", src) 61 cv.waitKey(0) 62 63 def ssd_video(): 64 ''' 65 1、读取模型、声明推理器 66 ''' 67 ie = IECore() 68 for device in ie.available_devices: 69 print(device) 70 model_xml = "ssdv2_graph.xml" 71 model_bin = "ssdv2_graph.bin" 72 # read the net 73 net = ie.read_network(model=model_xml, weights=model_bin) 74 # 获取网络的输入、输出 75 input_blob = next(iter(net.inputs)) 76 out_blob = next(iter(net.outputs)) 77 n, c, h, w = net.inputs[input_blob].shape 78 print(n, c, h, w) # 1 3 224 224 xml文件前面就有 79 # 声明推理器 80 exec_net = ie.load_network(network=net, device_name="CPU") 81 ''' 82 2、读取类别文件 83 ''' 84 with open('object_detection_classes_coco.txt') as f: 85 labels = [line.strip() for line in f.readlines()] 86 ''' 87 3、读取视频和预处理 88 ''' 89 cap = cv.VideoCapture("1.mp4") 90 while True: 91 ret, frame = cap.read() 92 if ret is not True: 93 break 94 # src = cv.imread("d:/3.png") 95 # resize 96 image = cv.resize(frame, (w, h)) 97 # # -> c, h, w 98 image = image.transpose(2, 0, 1) 99 ''' 100 4、推理 & 解析结果 101 ''' 102 # 计算耗时 103 t1 = time.time() #ms 104 res = exec_net.infer(inputs={input_blob: [image]}) # []:使得c, h, w -> 1, c, h, w;其实就是batch_size = 1 105 delta_t = time.time() - t1 106 print("fps:", 1 / delta_t) 107 res = res[out_blob] 108 # 获取网络输出:1*1*N*7 109 # 7: image_id label conf x_min y_min x_max y_max(后四个数是归一化后的数) 110 ih, iw, ic = frame.shape 111 for obj in res[0][0]: 112 if obj[2] >0.5: 113 index = int(obj[1] - 1) # 减去1是background 114 xmin = int(obj[3] * iw) 115 ymin = int(obj[4] * ih) 116 xmax = int(obj[5] * iw) 117 ymax = int(obj[6] * ih) 118 print(labels[index]) 119 cv.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 255), 1, 8) 120 cv.putText(frame, "FPS: " + str(int(1 / delta_t)), (20, 20), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, 8) 121 cv.putText(frame, labels[index] + str(obj[2]), (xmin, ymin), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, 8) 122 123 ''' 124 5、可视化 125 ''' 126 cv.namedWindow("ssd object detection", cv.WINDOW_NORMAL) 127 cv.imshow("ssd object detection", frame) 128 c = cv.waitKey(1) 129 if c == 27: 130 break 131 132 133 if __name__ == "__main__": 134 ssd_video()
三、异构处理
我理解就是多线程加速,对于处理上面视频,我们新增了一下三点:
1、双核心处理,并行处理两帧
1 # 声明推理器 2 # exec_net = ie.load_network(network=net, device_name="CPU") 3 exec_net = ie.load_network(network=net, device_name="CPU", num_requests=2) 4 curr_request_id = 0 5 next_request_id = 1
我们这里加载了两帧(frame,next_frame),便于后面数据交换
1 cap = cv.VideoCapture("1.mp4") 2 ret, frame = cap.read() 3 while True: 4 next_ret, next_frame = cap.read() 5 if next_ret is not True: 6 break 7 image = cv.resize(frame, (w, h)) # resize
2、异步推理
1 exec_net.start_async(request_id=next_request_id, inputs={input_blob: [image]})
3、交换数据(速度有待测试)
1 # 交换数据 2 frame = next_frame 3 curr_request_id, next_request_id = next_request_id, curr_request_id
1 def ssd_video_asyac(): 2 ''' 3 1、读取模型、声明推理器 4 ''' 5 ie = IECore() 6 for device in ie.available_devices: 7 print(device) 8 model_xml = "ssdv2_graph.xml" 9 model_bin = "ssdv2_graph.bin" 10 # read the net 11 net = ie.read_network(model=model_xml, weights=model_bin) 12 # 获取网络的输入、输出 13 input_blob = next(iter(net.inputs)) 14 out_blob = next(iter(net.outputs)) 15 n, c, h, w = net.inputs[input_blob].shape 16 print(n, c, h, w) # 1 3 224 224 xml文件前面就有 17 # 声明推理器 18 # exec_net = ie.load_network(network=net, device_name="CPU") 19 exec_net = ie.load_network(network=net, device_name="CPU", num_requests=2) 20 curr_request_id = 0 21 next_request_id = 1 22 ''' 23 2、读取类别文件 24 ''' 25 with open('object_detection_classes_coco.txt') as f: 26 labels = [line.strip() for line in f.readlines()] 27 ''' 28 3、读取视频和预处理 29 ''' 30 cap = cv.VideoCapture("1.mp4") 31 ret, frame = cap.read() 32 while True: 33 next_ret, next_frame = cap.read() 34 if next_ret is not True: 35 break 36 image = cv.resize(frame, (w, h)) # resize 37 image = image.transpose(2, 0, 1)# # -> c, h, w 38 ''' 39 4、推理 & 解析结果 40 ''' 41 # 计算耗时 42 t1 = time.time() # ms 43 # res = exec_net.infer(inputs={input_blob: [image]}) # []:使得c, h, w -> 1, c, h, w;其实就是batch_size = 1 44 exec_net.start_async(request_id=next_request_id, inputs={input_blob: [image]}) 45 delta_t = time.time() - t1 46 print("fps:", delta_t) 47 # 根据状态检查 48 if exec_net.requests[curr_request_id].wait(-1) == 0: 49 res = exec_net.requests[curr_request_id].output_blobs[out_blob].buffer 50 # 获取网络输出:1*1*N*7; 7: image_id label conf x_min y_min x_max y_max(后四个数是归一化后的数) 51 ih, iw, ic = frame.shape 52 for obj in res[0][0]: 53 if obj[2] > 0.5: 54 index = int(obj[1] - 1) # 减去1是background 55 xmin = int(obj[3] * iw) 56 ymin = int(obj[4] * ih) 57 xmax = int(obj[5] * iw) 58 ymax = int(obj[6] * ih) 59 print(labels[index]) 60 cv.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 255), 1, 8) 61 # cv.putText(frame, "FPS: " + str(int(1 / delta_t)), (20, 20), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, 8) 62 cv.putText(frame, labels[index] + str(obj[2]), (xmin, ymin), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 63 1, 8) 64 ''' 65 5、可视化 66 ''' 67 cv.namedWindow("ssd object detection", cv.WINDOW_NORMAL) 68 cv.imshow("ssd object detection", frame) 69 c = cv.waitKey(1) 70 if c == 27: 71 break 72 # 交换数据 73 frame = next_frame 74 curr_request_id, next_request_id = next_request_id, curr_request_id
四、异构人脸检测
模型下载,在博客最开始已经写好。
import cv2 as cv import numpy as np import time # 用于单帧计时 from openvino.inference_engine import IECore def ssd_video_asyac(): ''' 1、读取模型、声明推理器 ''' ie = IECore() for device in ie.available_devices: print(device) model_xml = "face-detection-0102.xml" model_bin = "face-detection-0102.bin" # read the net net = ie.read_network(model=model_xml, weights=model_bin) # 获取网络的输入、输出 input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) n, c, h, w = net.inputs[input_blob].shape print(n, c, h, w) # 1 3 224 224 xml文件前面就有 # 声明推理器 # exec_net = ie.load_network(network=net, device_name="CPU") exec_net = ie.load_network(network=net, device_name="CPU", num_requests=2) curr_request_id = 0 next_request_id = 1 ''' 2、读取类别文件 ''' with open('object_detection_classes_coco.txt') as f: labels = [line.strip() for line in f.readlines()] ''' 3、读取视频和预处理 ''' cap = cv.VideoCapture("1.mp4") ret, frame = cap.read() while True: next_ret, next_frame = cap.read() if next_ret is not True: break image = cv.resize(frame, (w, h)) # resize image = image.transpose(2, 0, 1)# # -> c, h, w ''' 4、推理 & 解析结果 ''' # 计算耗时 t1 = time.time() # ms # res = exec_net.infer(inputs={input_blob: [image]}) # []:使得c, h, w -> 1, c, h, w;其实就是batch_size = 1 exec_net.start_async(request_id=next_request_id, inputs={input_blob: [image]}) delta_t = time.time() - t1 print("the cost time(ms):%.3f"%(1000 * delta_t)) # 根据状态检查 if exec_net.requests[curr_request_id].wait(-1) == 0: res = exec_net.requests[curr_request_id].output_blobs[out_blob].buffer # 获取网络输出:1*1*N*7; 7: image_id label conf x_min y_min x_max y_max(后四个数是归一化后的数) ih, iw, ic = frame.shape for obj in res[0][0]: if obj[2] > 0.5: index = int(obj[1] - 1) # 减去1是background xmin = int(obj[3] * iw) ymin = int(obj[4] * ih) xmax = int(obj[5] * iw) ymax = int(obj[6] * ih) print(labels[index]) cv.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 255), 1, 8) #cv.putText(frame, "FPS: " + str(int(1.0 / delta_t)), (20, 20), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, 8) cv.putText(frame, labels[index] + str(obj[2]), (xmin, ymin), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, 8) ''' 5、可视化 ''' cv.namedWindow("face detection", cv.WINDOW_NORMAL) cv.imshow("face detection", frame) c = cv.waitKey(1) if c == 27: break # 交换数据 frame = next_frame curr_request_id, next_request_id = next_request_id, curr_request_id if __name__ == "__main__": ssd_video_asyac()
速度还是有点不可思议的。