完善资料让更多小伙伴认识你,还能领取20积分哦, 立即完善>
扫一扫,分享给好友
一、搭建开发环境
sudo apt-get install -y cmake gcc gcc-c++ protobuf-devel protobuf-compiler lapack-devel opencv-devel sudo apt-get install -y python3-devel python3-opencv python3-numpy-f2py python3-scipy python3-h5py python3-lmdb python3-grpcio # 切换到python3.x sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 150 # 社区下载rk-toolkit-0.98 pip3 install --user -r rk-toolkit/package/requirements-cpu.txt(需要将requirements-cpu.txt中的tensorflow删除掉,目前我们单独安装tensorflow aarch64的whl包)
# pip3 install --user onnx-1.4.1-cp36-cp36m-linux_aarch64.whl pip3 install onnx # 最新版本的轮子包
cd rknn-toolkit/example/mobilenet_v1 python3 test.py
sudo apt-get install freetype-devel pkg-config libpng-devel pkg-config pip3 install --user matplotlib
sudo apt-get clean all sudo apt-get update sudo apt-get install gstreamer-rockchip 若安装遇到报错,请视报错信息安装对应的软件包。 二、基于RKNN开发 2.1 运行yolov3 rknn模型 解压到PC Linux系统上(Windows, Mac不支持预编译)用于转化模型(也可以在板子上转模型,但不推荐,速度慢且不支持预编译) PC端操作 进入yolov3_demo目录,并从darknet官网下载权重(有自己模型就不需要下载了, 拷贝你自己训练的.weight文件和yolov3-tiny-modify.cfg文件到rk3399pro开发板) cd yolov3_demo # 如果有自己的模型,下面两个下载模型的命令就不需要操作了 wget https://pjreddie.com/media/files/yolov3.weights wget https://pjreddie.com/media/files/yolov3-tiny.weights
# python3 rknn_transform_320x320.py python3 rknn_transform_416x416.py # 本人使用的是416X416的输入,根据你自己的输入修改 # python3 rknn_transform_608x608.py # python3 rknn_transform_tiny.py 有用户反馈,使用rknn-toolkit1.0.0及以上版本,跑yolov3模型会卡在init_runtime,这是由于原脚本没有开启预编译导致的,现已修改转化脚本默认开启预编译,开启预编译后得到的模型不能在PC模拟器上运行,只能在板子上运行。 开发板上操作 在PC上转模型成功后,拷贝PC上的yolov3_demo和模型到开发板,在开发板操作以下步骤 运行python接口的demo
# python3 rknn_picture_320x320.py python3 rknn_picture_416x416.py # 本人使用的此模型 # python3 rknn_picture_608x608.py # python3 rknn_picture_tiny.py
# python3 rknn_camera_320x320.py python3 rknn_camera_416x416.py # 本人使用的是416X416的输入,根据你自己的输入修改 # python3 rknn_camera_608x608.py # python3 rknn_camera_tiny.py 运行C接口的demo 除了python接口的demo外,还提供了c接口的demo,该demo基于论坛开发者分享修改而来,感谢该名开发者
cd yolov3_demo/C-yolov3/build cmake .. make
./yolov3-320 c 0 ./yolov3-416 c 0 ./yolov3-608 c 0 ./yolov3-tiny c 0
./yolov3-320 v ../../video/3.mp4 ./yolov3-416 v ../../video/3.mp4 ./yolov3-608 v ../../video/3.mp4 ./yolov3-tiny v ../../video/3.mp4 2.2 部分代码展示 rknn_resize.py import glob import os from PIL import Image if __name__ == "__main__": images_path = glob.glob(os.path.join("data/", '*.jpg')) #print(images_path) for filename in images_path: temp = filename.find("_320x320") if temp >= 0: continue temp = filename.find("_416x416") if temp >= 0: continue temp = filename.find("_608x608") if temp >= 0: continue print(filename) prefix = filename.split('.jpg')[0] #print(prefix) img = Image.open(filename) im = img.resize((320, 320), Image.ANTIALIAS) im.save(prefix + "_320x320.jpg") im = img.resize((416, 416), Image.ANTIALIAS) im.save(prefix + "_416x416.jpg") im = img.resize((608, 608), Image.ANTIALIAS) im.save(prefix + "_608x608.jpg") rknn_transform.py from PIL import Image import numpy as np #from matplotlib import pyplot as plt import re import math import random from rknn.api import RKNN if __name__ == '__main__': # Create RKNN object rknn = RKNN() # Load tensorflow model print('--> Loading model') # rknn.load_darknet(model='./yolov3-tiny.cfg', weight="./yolov3-tiny.weights") rknn.load_darknet(model='./yolov3.cfg', weight="./yolov3.weights") print('done') # rknn.config(channel_mean_value='0 0 0 255', reorder_channel='0 1 2') rknn.config(channel_mean_value='0 0 0 255', reorder_channel='0 1 2', batch_size=1) # Build model print('--> Building model') rknn.build(do_quantization=True, dataset='./dataset_416x416.txt') print('done') # rknn.export_rknn('./yolov3_tiny.rknn') rknn.export_rknn('./yolov3.rknn') exit(0) rknn_picture.py 主函数 if __name__ == '__main__': rknn = load_model() im_file = './data/dog.jpg' # 注意修改为自己需要测试的图片 im = Image.open(im_file) im = im.resize((416, 416)) mat = np.asarray(im.convert('RGB')) out_boxes, out_boxes2, out_boxes3 = rknn.inference(inputs=[mat]) out_boxes = out_boxes.reshape(SPAN, LISTSIZE, GRID0, GRID0) out_boxes2 = out_boxes2.reshape(SPAN, LISTSIZE, GRID1, GRID1) out_boxes3 = out_boxes3.reshape(SPAN, LISTSIZE, GRID2, GRID2) input_data = [] input_data.append(np.transpose(out_boxes, (2, 3, 0, 1))) input_data.append(np.transpose(out_boxes2, (2, 3, 0, 1))) input_data.append(np.transpose(out_boxes3, (2, 3, 0, 1))) boxes, classes, scores = yolov3_post_process(input_data, acnhors) image = cv2.imread(im_file) if boxes is not None: draw(image, boxes, scores, classes) cv2.imshow("results",image) cv2.waitKey(0) cv2.destroyAllWindows() rknn.release() rknn_camera.py if __name__ == '__main__': rknn = load_model() font = cv2.FONT_HERSHEY_SIMPLEX; #capture = cv2.VideoCapture("data/3.mp4") capture = cv2.VideoCapture(0) accum_time = 0 curr_fps = 0 prev_time = timer() fps = "FPS: ??" try: while(True): ret, frame = capture.read() if ret == True: image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = cv2.resize(image, (416, 416)) # 修改为自己的尺寸 testtime=timer() out_boxes, out_boxes2, out_boxes3 = rknn.inference(inputs=[image]) testtime2=timer() print("rknn use time {}", testtime2-testtime) out_boxes = out_boxes.reshape(SPAN, LISTSIZE, GRID0, GRID0) out_boxes2 = out_boxes2.reshape(SPAN, LISTSIZE, GRID1, GRID1) out_boxes3 = out_boxes3.reshape(SPAN, LISTSIZE, GRID2, GRID2) input_data = [] input_data.append(np.transpose(out_boxes, (2, 3, 0, 1))) input_data.append(np.transpose(out_boxes2, (2, 3, 0, 1))) input_data.append(np.transpose(out_boxes3, (2, 3, 0, 1))) testtime=timer() boxes, classes, scores = yolov3_post_process(input_data) testtime2=timer() print("process use time: {}", testtime2-testtime) testtime=timer() if boxes is not None: draw(frame, boxes, scores, classes) curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time += exec_time curr_fps += 1 if accum_time > 1: accum_time -= 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 cv2.putText(frame, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(255, 0, 0), thickness=2) cv2.imshow("results", frame) c = cv2.waitKey(5) & 0xff if c == 27: cv2.destroyAllWindows() capture.release() rknn.release() break; testtime2=timer() print("show image use time: {}", testtime2-testtime) except KeyboardInterrupt: cv2.destroyAllWindows() capture.release() rknn.release() |
|
|
|
三、优化-多任务
多任务优化,大家可以先学习以下进程,线程,协程等实现多任务的方式。 多进程 多进程简说 Queue 进程间通信:进程间不共享参数,Queue 用来在多个进程间通信。 Queue 有两个方法,get 和 put。
python多进程共享变量Value的使用tips multiprocessing.Value(“d”, 0)使用扩展 在根据网上资料使用Value时,由于共享的是字符串,但网上介绍的都是整数或者字符,于是遇到了很多阻碍,通过查询官方文档得出了解决方案。 对于共享整数或者单个字符,初始化比较简单 import multiprocessing num = multiprocessing.Value("d", 10.0) # double型 import multiprocessing from ctypes import c_char_p num = multiprocessing.Value(c_char_p, b'ss') # 字符串型 多进程来加速rknn 主要优化视频文件预测,只写相比rknn_camera.py差别的函数。 函数代码如下: 1. 读取视频部分,前处理 video_capture函数代码: def video_capture(q_frame:Queue, q_image:Queue, flag): video = cv2.VideoCapture(0) print("video.isOpened()={}", video.isOpened()) try: while True: if flag.value == 20: if video.isOpened(): video.release() print("video release!") print("exit video_capture!") break s = time.time() ret, frame = video.read() assert ret, 'read video frame failed.' #print('capture read used {} ms.'.format((time.time() - s) * 1000)) s = time.time() image = cv2.resize(frame, (416, 416)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #print('capture resize used {} ms.'.format((time.time() - s) * 1000)) s = time.time() if q_frame.empty(): q_frame.put(frame) if q_image.full(): continue else: q_image.put(image) #print("capture put to queue used {} ms".format((time.time()-s)*1000)) except KeyboardInterrupt: video.release() print("exit video_capture!") 2. rknn推理部分 infer_rknn函数 def infer_rknn(q_image:Queue, q_infer:Queue, flag): rknn = load_model() try: while True: if flag.value == 10: print("befor exit infer rknn") rknn.release() print("exit infer_rknn!") flag.value = 20 break s = time.time() if q_image.empty(): continue else: image = q_image.get() #print('Infer get, used time {} ms. '.format((time.time() - s) * 1000)) s = time.time() out_boxes, out_boxes2 = rknn.inference(inputs=[image]) out_boxes = out_boxes.reshape(SPAN, LISTSIZE, GRID0, GRID0) out_boxes2 = out_boxes2.reshape(SPAN, LISTSIZE, GRID1, GRID1) out_boxes3 = out_boxes3.reshape(SPAN, LISTSIZE, GRID2, GRID2) input_data = [] input_data.append(np.transpose(out_boxes, (2, 3, 0, 1))) input_data.append(np.transpose(out_boxes2, (2, 3, 0, 1))) input_data.append(np.transpose(out_boxes3, (2, 3, 0, 1))) #print('Infer done, used time {} ms. '.format((time.time() - s) * 1000)) s = time.time() if q_infer.full(): continue else: q_infer.put(input_data) #print('Infer put, used time {} ms. '.format((time.time() - s) * 1000)) except KeyboardInterrupt: print("befor exit infer rknn") rknn.release() print("exit infer_rknn!") 3. 后处理部分 yolov3_post_process函数 def yolov3_post_process(input_data, anchors): len_Anchors = int(len(anchors)/3) # yolov3 if len_Anchors == 3: masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] else: # yolov3-tiny masks = [[3, 4, 5], [0, 1, 2]] boxes, classes, scores = [], [], [] for input,mask in zip(input_data, masks): b, c, s = process(input, mask, anchors) b, c, s = filter_boxes(b, c, s) boxes.append(b) classes.append(c) scores.append(s) boxes = np.concatenate(boxes) classes = np.concatenate(classes) scores = np.concatenate(scores) # # Scale boxes back to original image shape. # width, height = 416, 416 #shape[1], shape[0] # image_dims = [width, height, width, height] # boxes = boxes * image_dims nboxes, nclasses, nscores = [], [], [] for c in set(classes): inds = np.where(classes == c) b = boxes[inds] c = classes[inds] s = scores[inds] keep = nms_boxes(b, s) nboxes.append(b[keep]) nclasses.append(c[keep]) nscores.append(s[keep]) if not nclasses and not nscores: return None, None, None boxes = np.concatenate(nboxes) classes = np.concatenate(nclasses) scores = np.concatenate(nscores) return boxes, classes, scores post_process函数 def post_process(q_infer, q_objs, flag, anchors): while True: if flag.value == 20: break s = time.time() if q_infer.empty(): continue else: input_data = q_infer.get() #print('Post process get, used time {} ms. '.format((time.time() - s) * 1000)) s = time.time() boxes, classes, scores = yolov3_post_process(input_data, anchors) #print('Post process done, used time {} ms. '.format((time.time() - s) * 1000)) s = time.time() if q_objs.full(): continue else: q_objs.put((boxes, classes, scores)) #print('Post process put, used time {} ms. '.format((time.time()-s)*1000)) 4. 多进程预测视频的主函数 rknn_camera_multiProcess.py的主函数 main函数 def main(): # log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.DEBUG) q_frame = Queue(maxsize=1) q_image = Queue(maxsize=3) q_infer = Queue(maxsize=3) q_objs = Queue(maxsize=3) flag = multiprocessing.Value("d", 0) p_cap1 = Process(target=video_capture, args=(q_frame, q_image, flag)) # p_cap2 = Process(target=video_capture, args=(q_frame, q_image, flag)) p_infer1 = Process(target=infer_rknn, args=(q_image, q_infer, flag)) p_infer2 = Process(target=infer_rknn, args=(q_image, q_infer, flag)) p_post1 = Process(target=post_process, args=(q_infer, q_objs, flag)) p_post2 = Process(target=post_process, args=(q_infer, q_objs, flag)) p_cap1.start() # p_cap2.start() p_infer1.start() p_infer2.start() p_post1.start() p_post2.start() fps = 0 l_used_time = [] try: while True: s = time.time() frame = q_frame.get() boxes, classes, scores = q_objs.get() # print('main func, get objs use {} ms. '.format((time.time() - s) * 1000)) if boxes is not None: draw(frame, boxes, scores, classes) cv2.putText(frame, text='FPS: {}'.format(fps), org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(255, 0, 0), thickness=2) cv2.imshow("results", frame) c = cv2.waitKey(5) & 0xff if c == 27: flag.value = 10 time.sleep(5) cv2.destroyAllWindows() print("ESC, exit main!") break used_time = time.time() - s l_used_time.append(used_time) if len(l_used_time) > 20: l_used_time.pop(0) fps = int(1 / np.mean(l_used_time)) # print('main func, used time {} ms. '.format(used_time*1000)) except KeyboardInterrupt: time.sleep(5) cv2.destroyAllWindows() print("ctrl + c, exit main!") p_cap1.terminate() # p_cap2.terminate() p_infer1.terminate() p_infer2.terminate() p_post1.terminate() p_post2.terminate() sys.exit() if __name__ == '__main__': main() 多线程
前处理开一个线程,NPU开第二个线程,后处理开第三个线程 至少可以提高到30帧 14帧左右-单线程 多线程快 四、yolov3理论分析(结合代码) 1. 物体检测 物体检测
YOLO算法的基本思想是:首先将输入图像分成SS个格子,如果某个预测对象的中心坐标落在某个格子中,那么就由该格子来预测该对象,每个格子都会预测B个边界框,每个边界框输出为(5+C)长度参数 SxS: 在yolov3-416中会切割成13x13, 26x26, 52x52; 计算方式:yolo1= 输入尺寸(416)/ 缩放数(32); # 13 yolo2 = 2yolo2; # 26 yolo3=4*yolo1; # 52 B: 在yolov3中该值为3, 由anchors决定 C: 模型可预测分类, VOC数据集为20,coco数据集为80,也可以自己训练,本人是2类 5: bounding box边界框的中心坐标,长宽,对象置信度 |
|
|
|
3. YOLOV3神经网络图
yolov3-416: 输入参数416x416x3, 输出3个数组13x13x(3x(5+N)) 26x26x(3x(5+N)) 52x52x(3x(5+N)) yolov3-608: 输入参数608x608x3, 输出3个数组19x19x(3x(5+N)) 38x38x(3x(5+N)) 76x76x(3x(5+N)) tiny-yolov3: 输入参数416x416x3, 输出2个数组13x13x(3x(5+N)) 26x26x(3x(5+N)) 论文N=80;N为检测的类别数目 4. yolov3后处理 4.1. 对数空间变换 4.1.1对数空间变换函数: tx, ty, tw, th为模型输出 cx, cy是物体中心所在格子索引 6()是sigmoid函数,变量映射到(0, 1) pw, ph为对应anchors box的宽高 4.1.2程序代码: yolov3后处理程序-对数空间转换函数 def process(input, mask, anchors): anchors = [anchors for i in mask] grid_h, grid_w = map(int, input.shape[0:2]) box_confidence = input[..., 4] obj_thresh = -np.log(1/OBJ_THRESH - 1) pos = np.where(box_confidence > obj_thresh) input = input[pos] box_confidence = sigmoid(input[..., 4]) box_confidence = np.expand_dims(box_confidence, axis=-1) box_class_probs = sigmoid(input[..., 5:]) box_xy = sigmoid(input[..., :2]) box_wh = np.exp(input[..., 2:4]) for idx, val in enumerate(pos[2]): box_wh[idx] = box_wh[idx] * anchors[pos[2][idx]] pos0 = np.array(pos[0])[:, np.newaxis] pos1 = np.array(pos[1])[:, np.newaxis] grid = np.concatenate((pos1, pos0), axis=1) box_xy += grid box_xy /= (grid_w, grid_h) box_wh /= (416, 416) # 此处还可以优化,以传参数的方式,代码就写活了 box_xy -= (box_wh / 2.) box = np.concatenate((box_xy, box_wh), axis=-1) return box, box_confidence, box_class_probs 4.2. 阈值过滤 基于对象置信度的阈值,首先,我们根据对象分数过滤框。 通常,具有低于阈值分数的框被忽略。 程序代码 yolov3后处理程序-阈值过滤 def filter_boxes(boxes, box_confidences, box_class_probs): """Filter boxes with object threshold. # Arguments boxes: ndarray, boxes of objects. box_confidences: ndarray, confidences of objects. box_class_probs: ndarray, class_probs of objects. # Returns boxes: ndarray, filtered boxes. classes: ndarray, classes for boxes. scores: ndarray, scores for boxes. """ box_scores = box_confidences * box_class_probs box_classes = np.argmax(box_scores, axis=-1) box_class_scores = np.max(box_scores, axis=-1) pos = np.where(box_class_scores >= OBJ_THRESH) # 注意点 boxes = boxes[pos] classes = box_classes[pos] scores = box_class_scores[pos] return boxes, classes, scores 4.3. 非最大抑制NMS 4.3.1 原理讲解 NMS打算解决同一图像的多重检测问题。 A、B、C、D、E、F 1. 从最大概率矩形框F开始,分别判断A~E与F的重叠度IOU是否大于某个设定的阈值; 2. 假设B、D与F的重叠度超过阈值,那么就扔掉B、D;并标记第一个矩形框F,是我们保留下来的 3. 从剩下的矩形框A、C、E中,选择概率最大的E,然后判断E与A、C的重叠度,重叠度大于一定的阈值,那么就扔掉;并标记E是我们保留下来的第二个矩形框。 4.3.2 程序代码 yolov3后处理程序-非最大抑制NMS def nms_boxes(boxes, scores): """Suppress non-maximal boxes. # Arguments boxes: ndarray, boxes of objects. scores: ndarray, scores of objects. # Returns keep: ndarray, index of effective boxes. """ x = boxes[:, 0] y = boxes[:, 1] w = boxes[:, 2] h = boxes[:, 3] areas = w * h order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x, x[order[1:]]) yy1 = np.maximum(y, y[order[1:]]) xx2 = np.minimum(x + w, x[order[1:]] + w[order[1:]]) yy2 = np.minimum(y + h, y[order[1:]] + h[order[1:]]) w1 = np.maximum(0.0, xx2 - xx1 + 0.00001) h1 = np.maximum(0.0, yy2 - yy1 + 0.00001) inter = w1 * h1 ovr = inter / (areas + areas[order[1:]] - inter) inds = np.where(ovr <= NMS_THRESH)[0] order = order[inds + 1] keep = np.array(keep) return keep 5. Yolov3速度对比 总结 后续将继续优化代码,敬请等待。欢迎小伙伴们提出建议,将优化做到更好。欢迎加入交流学习。 |
|
|
|
你正在撰写答案
如果你是对答案或其他答案精选点评或询问,请使用“评论”功能。
基于米尔瑞芯微RK3576核心板/开发板的人脸疲劳检测应用方案
1760 浏览 0 评论
2096 浏览 1 评论
1771 浏览 1 评论
3106 浏览 1 评论
4025 浏览 1 评论
小黑屋| 手机版| Archiver| 电子发烧友 ( 湘ICP备2023018690号 )
GMT+8, 2025-1-11 09:29 , Processed in 0.562298 second(s), Total 74, Slave 58 queries .
Powered by 电子发烧友网
© 2015 bbs.elecfans.com
关注我们的微信
下载发烧友APP
电子发烧友观察
版权所有 © 湖南华秋数字科技有限公司
电子发烧友 (电路图) 湘公网安备 43011202000918 号 电信与信息服务业务经营许可证:合字B2-20210191 工商网监 湘ICP备2023018690号