basedoc/face__detection__yunet_2demo_8py_source.html

# This file is part of OpenCV Zoo project.

# It is subject to the license terms in the LICENSE file found in the same directory.

#

# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.

# Third party copyrights are property of their respective owners.


import argparse


import numpy as np

import cv2 as cv


from yunet import YuNet


def str2bool(v):

    if v.lower() in ['on', 'yes', 'true', 'y', 't']:

        return True

    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:

        return False

    else:

        raise NotImplementedError


backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]

targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]

help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"

help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"

try:

    backends += [cv.dnn.DNN_BACKEND_TIMVX]

    targets += [cv.dnn.DNN_TARGET_NPU]

    help_msg_backends += "; {:d}: TIMVX"

    help_msg_targets += "; {:d}: NPU"

except:

    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')


parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')

parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')

parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2022mar.onnx', help='Path to the model.')

parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))

parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))

parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.')

parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')

parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')

parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')

parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')

args = parser.parse_args()


def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):

    output = image.copy()

    landmark_color = [

        (255,   0,   0), # right eye

        (  0,   0, 255), # left eye

        (  0, 255,   0), # nose tip

        (255,   0, 255), # right mouth corner

        (  0, 255, 255)  # left mouth corner

    ]


    if fps is not None:

        cv.putText(output, 'FPS: {:.2f}'.format(fps), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, text_color)


    for det in (results if results is not None else []):

        bbox = det[0:4].astype(np.int32)

        cv.rectangle(output, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), box_color, 2)


        conf = det[-1]

        cv.putText(output, '{:.4f}'.format(conf), (bbox[0], bbox[1]+12), cv.FONT_HERSHEY_DUPLEX, 0.5, text_color)


        landmarks = det[4:14].astype(np.int32).reshape((5,2))

        for idx, landmark in enumerate(landmarks):

            cv.circle(output, landmark, 2, landmark_color[idx], 2)


    return output


if __name__ == '__main__':

    # Instantiate YuNet

    model = YuNet(modelPath=args.model,

                  inputSize=[320, 320],

                  confThreshold=args.conf_threshold,

                  nmsThreshold=args.nms_threshold,

                  topK=args.top_k,

                  backendId=args.backend,

                  targetId=args.target)


    # If input is an image

    if args.input is not None:

        image = cv.imread(args.input)

        h, w, _ = image.shape


        # Inference

        model.setInputSize([w, h])

        results = model.infer(image)


        # Print results

        print('{} faces detected.'.format(results.shape[0]))

        for idx, det in enumerate(results):

            print('{}: {:.0f} {:.0f} {:.0f} {:.0f} {:.0f} {:.0f} {:.0f} {:.0f} {:.0f} {:.0f} {:.0f} {:.0f} {:.0f} {:.0f}'.format(

                idx, *det[:-1])

            )


        # Draw results on the input image

        image = visualize(image, results)


        # Save results if save is true

        if args.save:

            print('Resutls saved to result.jpg\n')

            cv.imwrite('result.jpg', image)


        # Visualize results in a new window

        if args.vis:

            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)

            cv.imshow(args.input, image)

            cv.waitKey(0)

    else: # Omit input to call default camera

        deviceId = 0

        cap = cv.VideoCapture(deviceId)

        w = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))

        h = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))

        model.setInputSize([w, h])


        tm = cv.TickMeter()

        while cv.waitKey(1) < 0:

            hasFrame, frame = cap.read()

            if not hasFrame:

                print('No frames grabbed!')

                break


            # Inference

            tm.start()

            results = model.infer(frame) # results is a tuple

            tm.stop()


            # Draw results on the input image

            frame = visualize(frame, results, fps=tm.getFPS())


            # Visualize results in a new Window

            cv.imshow('YuNet Demo', frame)


            tm.reset()


yunet.YuNet
Definition yunet.py:12

demo.visualize
visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None)
Definition demo.py:46

demo.str2bool
str2bool
Definition demo.py:43

demo.int
int
Definition demo.py:37