Skip to content

add vitpose_wholebody #9284

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions deploy/python/det_keypoint_unite_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@

KEYPOINT_SUPPORT_MODELS = {
'HigherHRNet': 'keypoint_bottomup',
'HRNet': 'keypoint_topdown'
'HRNet': 'keypoint_topdown',
'VitPose_TopDown_WholeBody': 'keypoint_topdown_wholebody'
}


Expand Down Expand Up @@ -177,9 +178,10 @@ def topdown_unite_predict_video(detector,
current_keypoints)

keypoint_res['keypoint'][0][0] = smooth_keypoints.tolist()


zero = np.zeros((height, width, 3), dtype=np.uint8)
im = visualize_pose(
frame,
zero,
keypoint_res,
visual_thresh=FLAGS.keypoint_threshold,
returnimg=True)
Expand Down Expand Up @@ -329,8 +331,7 @@ def main():
enable_mkldnn=FLAGS.enable_mkldnn,
use_dark=FLAGS.use_dark)
keypoint_arch = topdown_keypoint_detector.pred_config.arch
assert KEYPOINT_SUPPORT_MODELS[
keypoint_arch] == 'keypoint_topdown', 'Detection-Keypoint unite inference only supports topdown models.'
assert KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown' or KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown_wholebody', 'Detection-Keypoint unite inference only supports topdown models.'

# predict from video file or camera video stream
if FLAGS.video_file is not None or FLAGS.camera_id != -1:
Expand Down
2 changes: 1 addition & 1 deletion deploy/python/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from benchmark_utils import PaddleInferBenchmark
from picodet_postprocess import PicoDetPostProcess
from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride, LetterBoxResize, WarpAffine, Pad, decode_image, CULaneResize
from keypoint_preprocess import EvalAffine, TopDownEvalAffine, expand_crop
from keypoint_preprocess import EvalAffine, TopDownEvalAffine, TopDownAffineImage, expand_crop
from clrnet_postprocess import CLRNetPostProcess
from visualize import visualize_box_mask, imshow_lanes
from utils import argsparser, Timer, get_current_memory_mb, multiclass_nms, coco_clsid2catid
Expand Down
49 changes: 48 additions & 1 deletion deploy/python/keypoint_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,40 @@
# Global dictionary
KEYPOINT_SUPPORT_MODELS = {
'HigherHRNet': 'keypoint_bottomup',
'HRNet': 'keypoint_topdown'
'HRNet': 'keypoint_topdown',
'VitPose_TopDown_WholeBody': 'keypoint_topdown_wholebody'
}


def _box2cs(image_size, box):
"""This encodes bbox(x,y,w,h) into (center, scale)

Args:
x, y, w, h

Returns:
tuple: A tuple containing center and scale.

- np.ndarray[float32](2,): Center of the bbox (x, y).
- np.ndarray[float32](2,): Scale of the bbox w & h.
"""

x, y, w, h = box[:4]
input_size = image_size
aspect_ratio = input_size[0] / input_size[1]
center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)

if w > aspect_ratio * h:
h = w * 1.0 / aspect_ratio
elif w < aspect_ratio * h:
w = h * aspect_ratio

# pixel std is 200.0
scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
scale = scale * 1.25

return center, scale

class KeyPointDetector(Detector):
"""
Args:
Expand Down Expand Up @@ -137,6 +167,23 @@ def postprocess(self, inputs, result):
imshape = inputs['im_shape'][:, ::-1]
center = np.round(imshape / 2.)
scale = imshape / 200.
keypoint_postprocess = HRNetPostProcess(use_dark=self.use_dark)
kpts, scores = keypoint_postprocess(np_heatmap, center, scale)
results['keypoint'] = kpts
results['score'] = scores
return results
elif KEYPOINT_SUPPORT_MODELS[
self.pred_config.arch] == 'keypoint_topdown_wholebody':
results = {}
imshape = inputs['im_shape'][:, ::-1]
center = []
scale = []
for i in range(len(inputs['im_shape'])):
transize = np.shape(inputs["image"])
tmp_center, tmp_scale = _box2cs([np.shape(inputs["image"])[-1],np.shape(inputs["image"])[-2]], [0,0,inputs['im_shape'][i][1],inputs['im_shape'][i][0]] )
center.append(tmp_center)
scale.append(tmp_scale)

keypoint_postprocess = HRNetPostProcess(use_dark=self.use_dark)
kpts, scores = keypoint_postprocess(np_heatmap, center, scale)
results['keypoint'] = kpts
Expand Down
77 changes: 77 additions & 0 deletions deploy/python/keypoint_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,83 @@
import numpy as np


def _box2cs(image_size, box):
"""This encodes bbox(x,y,w,h) into (center, scale)

Args:
x, y, w, h

Returns:
tuple: A tuple containing center and scale.

- np.ndarray[float32](2,): Center of the bbox (x, y).
- np.ndarray[float32](2,): Scale of the bbox w & h.
"""

x, y, w, h = box[:4]
input_size = image_size
aspect_ratio = input_size[0] / input_size[1]
center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)

if w > aspect_ratio * h:
h = w * 1.0 / aspect_ratio
elif w < aspect_ratio * h:
w = h * aspect_ratio

# pixel std is 200.0
scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
scale = scale * 1.25

return center, scale

class TopDownAffineImage(object):
"""apply affine transform to image and coords

Args:
trainsize (list): [w, h], the standard size used to train
use_udp (bool): whether to use Unbiased Data Processing.
records(dict): the dict contained the image and coords

Returns:
records (dict): contain the image and coords after tranformed

"""

def __init__(self, trainsize, use_udp=False, use_box2cs=True):
self.trainsize = trainsize
self.use_udp = use_udp
self.use_box2cs = use_box2cs

def __call__(self, records, im_info):
if self.use_box2cs:
center, scale = _box2cs(self.trainsize, [0,0,im_info['im_shape'][1],im_info['im_shape'][0]])
else:
imshape = im_info['im_shape'][::-1]
center = im_info['center'] if 'center' in im_info else imshape / 2.
scale = im_info['scale'] if 'scale' in im_info else imshape

image = records
rot = records['rotate'] if "rotate" in records else 0
if self.use_udp:
trans = get_warp_matrix(
rot, center * 2.0,
[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0],
scale * 200.0)
image = cv2.warpAffine(
image,
trans, (int(self.trainsize[0]), int(self.trainsize[1])),
flags=cv2.INTER_LINEAR)
joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(), trans)
else:
trans = get_affine_transform(center, scale *
200, rot, self.trainsize)
image = cv2.warpAffine(
image,
trans, (int(self.trainsize[0]), int(self.trainsize[1])),
flags=cv2.INTER_LINEAR)
return image, im_info


class EvalAffine(object):
def __init__(self, size, stride=64):
super(EvalAffine, self).__init__()
Expand Down
25 changes: 16 additions & 9 deletions deploy/python/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import numpy as np
import PIL
from PIL import Image, ImageDraw, ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True

def imagedraw_textsize_c(draw, text):
Expand Down Expand Up @@ -234,14 +235,14 @@ def get_color(idx):
color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
return color


def visualize_pose(imgfile,
results,
visual_thresh=0.6,
save_name='pose.jpg',
save_dir='output',
returnimg=False,
ids=None):
ids=None,
draw_box=False):
try:
import matplotlib.pyplot as plt
import matplotlib
Expand All @@ -252,30 +253,36 @@ def visualize_pose(imgfile,
raise e
skeletons, scores = results['keypoint']
skeletons = np.array(skeletons)
kpt_nums = 17
kpt_nums = np.shape(skeletons)[1]
if len(skeletons) > 0:
kpt_nums = skeletons.shape[1]
if kpt_nums == 17: #plot coco keypoint
EDGES = [(0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 8),
(7, 9), (8, 10), (5, 11), (6, 12), (11, 13), (12, 14),
(13, 15), (14, 16), (11, 12)]
elif kpt_nums == 133:
EDGES = [(15, 13), (13, 11), (16, 14), (14, 12), (11, 12), (5, 11), (6, 12), (5, 6), (5, 7), (6, 8), (7, 9), (8, 10), (1, 2), (0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6), (15, 17), (15, 18), (15, 19), (16, 20), (16, 21), (16, 22), (91, 92), (92, 93), (93, 94), (94, 95), (91, 96), (96, 97), (97, 98), (98, 99), (91, 100), (100, 101), (101, 102), (102, 103), (91, 104), (104, 105), (105, 106), (106, 107), (91, 108), (108, 109), (109, 110), (110, 111), (112, 113), (113, 114), (114, 115), (115, 116), (112, 117), (117, 118), (118, 119), (119, 120), (112, 121), (121, 122), (122, 123), (123, 124), (112, 125), (125, 126), (126, 127), (127, 128), (112, 129), (129, 130), (130, 131), (131, 132)]

else: #plot mpii keypoint
EDGES = [(0, 1), (1, 2), (3, 4), (4, 5), (2, 6), (3, 6), (6, 7), (7, 8),
(8, 9), (10, 11), (11, 12), (13, 14), (14, 15), (8, 12),
(8, 13)]
NUM_EDGES = len(EDGES)

colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
if kpt_nums == 133:
colors = [(51, 153, 255), (51, 153, 255), (51, 153, 255), (51, 153, 255), (51, 153, 255), (0, 255, 0), (255, 128, 0), (0, 255, 0), (255, 128, 0), (0, 255, 0), (255, 128, 0), (0, 255, 0), (255, 128, 0), (0, 255, 0), (255, 128, 0), (0, 255, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 153, 255), (255, 153, 255), (255, 153, 255), (255, 153, 255), (102, 178, 255), (102, 178, 255), (102, 178, 255), (102, 178, 255), (255, 51, 51), (255, 51, 51), (255, 51, 51), (255, 51, 51), (0, 255, 0), (0, 255, 0), (0, 255, 0), (0, 255, 0), (255, 255, 255), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 153, 255), (255, 153, 255), (255, 153, 255), (255, 153, 255), (102, 178, 255), (102, 178, 255), (102, 178, 255), (102, 178, 255), (255, 51, 51), (255, 51, 51), (255, 51, 51), (255, 51, 51), (0, 255, 0), (0, 255, 0), (0, 255, 0), (0, 255, 0)]
else:
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
skeleton_link_colors = [(0, 255, 0), (0, 255, 0), (255, 128, 0), (255, 128, 0), (51, 153, 255), (51, 153, 255), (51, 153, 255), (51, 153, 255), (0, 255, 0), (255, 128, 0), (0, 255, 0), (255, 128, 0), (51, 153, 255), (51, 153, 255), (51, 153, 255), (51, 153, 255), (51, 153, 255), (51, 153, 255), (51, 153, 255), (0, 255, 0), (0, 255, 0), (0, 255, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 153, 255), (255, 153, 255), (255, 153, 255), (255, 153, 255), (102, 178, 255), (102, 178, 255), (102, 178, 255), (102, 178, 255), (255, 51, 51), (255, 51, 51), (255, 51, 51), (255, 51, 51), (0, 255, 0), (0, 255, 0), (0, 255, 0), (0, 255, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 128, 0), (255, 153, 255), (255, 153, 255), (255, 153, 255), (255, 153, 255), (102, 178, 255), (102, 178, 255), (102, 178, 255), (102, 178, 255), (255, 51, 51), (255, 51, 51), (255, 51, 51), (255, 51, 51), (0, 255, 0), (0, 255, 0), (0, 255, 0), (0, 255, 0)]
cmap = matplotlib.cm.get_cmap('hsv')
plt.figure()

img = cv2.imread(imgfile) if type(imgfile) == str else imgfile

color_set = results['colors'] if 'colors' in results else None

if 'bbox' in results and ids is None:
if 'bbox' in results and ids is None and draw_box:
bboxs = results['bbox']
for j, rect in enumerate(bboxs):
xmin, ymin, xmax, ymax = rect
Expand Down Expand Up @@ -325,7 +332,7 @@ def visualize_pose(imgfile,
(int(length / 2), stickwidth),
int(angle), 0, 360, 1)
if ids is None:
color = colors[i] if color_set is None else colors[color_set[j]
color = skeleton_link_colors[i] if color_set is None else colors[color_set[j]
%
len(colors)]
else:
Expand Down
5 changes: 3 additions & 2 deletions ppdet/engine/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,11 @@
'YOLOF': 40,
'METRO_Body': 3,
'DETR': 3,
'CLRNet': 3
'CLRNet': 3,
'VitPose_TopDown_WholeBody': 3
}

KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet', 'VitPose_TopDown_WholeBody']
MOT_ARCH = ['JDE', 'FairMOT', 'DeepSORT', 'ByteTrack', 'CenterTrack']
LANE_ARCH = ['CLRNet']

Expand Down
2 changes: 2 additions & 0 deletions ppdet/modeling/architectures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from . import keypoint_hrhrnet
from . import keypoint_hrnet
from . import keypoint_vitpose
from . import keypoint_vitpose_wholebody
from . import jde
from . import deepsort
from . import fairmot
Expand Down Expand Up @@ -61,6 +62,7 @@
from .keypoint_hrhrnet import *
from .keypoint_hrnet import *
from .keypoint_vitpose import *
from .keypoint_vitpose_wholebody import *
from .jde import *
from .deepsort import *
from .fairmot import *
Expand Down
Loading