새소식

Project

모바일 로봇 프로젝트 3 - Human Pose

  • -

Human Pose를 다루기 위해서 

PoseFormerV2를 추천한다

여기에 demo/vis.py에 유용한 함수들이 많다.

 

2D/3D pose 시각화랑 

 

H36M에 맞춘 Pose format Transformation 코드등이 있다.

 

https://github.com/QitaoZhao/PoseFormerV2

 

GitHub - QitaoZhao/PoseFormerV2: The project is an official implementation of our paper "PoseFormerV2: Exploring Frequency Domai

The project is an official implementation of our paper "PoseFormerV2: Exploring Frequency Domain for Efficient and Robust 3D Human Pose Estimation". - QitaoZhao/PoseFormerV2

github.com

 

 

여기서 vis.py 코드에서 내가 원하는 폴더의 mp4 파일들을 모두 불러서 2D pose랑 3D pose를 만드는 코드가 아래와 같다.

import sys import argparse import cv2 from lib.preprocess import h36m_coco_format, revise_kpts from lib.hrnet.gen_kpts import gen_video_kpts as hrnet_pose import os import numpy as np import torch import torch.nn as nn import glob from tqdm import tqdm import copy sys.path.append(os.getcwd()) from common.model_poseformer import PoseTransformerV2 as Model from common.camera import * import matplotlib import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D import matplotlib.gridspec as gridspec plt.switch_backend('agg') matplotlib.rcParams['pdf.fonttype'] = 42 matplotlib.rcParams['ps.fonttype'] = 42 def show2Dpose(kps, img): connections = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16]] LR = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], dtype=bool) lcolor = (255, 0, 0) rcolor = (0, 0, 255) thickness = 3 for j,c in enumerate(connections): start = map(int, kps[c[0]]) end = map(int, kps[c[1]]) start = list(start) end = list(end) cv2.line(img, (start[0], start[1]), (end[0], end[1]), lcolor if LR[j] else rcolor, thickness) cv2.circle(img, (start[0], start[1]), thickness=-1, color=(0, 255, 0), radius=3) cv2.circle(img, (end[0], end[1]), thickness=-1, color=(0, 255, 0), radius=3) return img def show3Dpose(vals, ax): ax.view_init(elev=15., azim=70) lcolor=(0,0,1) rcolor=(1,0,0) I = np.array( [0, 0, 1, 4, 2, 5, 0, 7, 8, 8, 14, 15, 11, 12, 8, 9]) J = np.array( [1, 4, 2, 5, 3, 6, 7, 8, 14, 11, 15, 16, 12, 13, 9, 10]) LR = np.array([0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0], dtype=bool) for i in np.arange( len(I) ): x, y, z = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(3)] ax.plot(x, y, z, lw=2, color = lcolor if LR[i] else rcolor) RADIUS = 0.72 RADIUS_Z = 0.7 xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2] ax.set_xlim3d([-RADIUS+xroot, RADIUS+xroot]) ax.set_ylim3d([-RADIUS+yroot, RADIUS+yroot]) ax.set_zlim3d([-RADIUS_Z+zroot, RADIUS_Z+zroot]) ax.set_aspect('auto') # works fine in matplotlib==2.2.2 white = (1.0, 1.0, 1.0, 0.0) ax.xaxis.set_pane_color(white) ax.yaxis.set_pane_color(white) ax.zaxis.set_pane_color(white) ax.tick_params('x', labelbottom = False) ax.tick_params('y', labelleft = False) ax.tick_params('z', labelleft = False) def get_pose2D(video_path, video_name, input_dir): cap = cv2.VideoCapture(video_path) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) print('\nGenerating 2D pose...') keypoints, scores = hrnet_pose(video_path, det_dim=416, num_peroson=1, gen_output=True) # keypoints.shape: (1, 138, 17, 2) / scores.shape: (1, 138, 17) keypoints, scores, valid_frames = h36m_coco_format(keypoints, scores) scores = scores.reshape(scores.shape[0], scores.shape[1], scores.shape[2], 1) kps_conf = np.concatenate([keypoints, scores], axis=3) print('Generating 2D pose successful!') output_npz = input_dir + f'{video_name}.npz' np.savez_compressed(output_npz, reconstruction=kps_conf) def img2video(video_path, output_dir): cap = cv2.VideoCapture(video_path) fps = int(cap.get(cv2.CAP_PROP_FPS)) + 5 fourcc = cv2.VideoWriter_fourcc(*"mp4v") names = sorted(glob.glob(os.path.join(output_dir + 'pose/', '*.png'))) img = cv2.imread(names[0]) size = (img.shape[1], img.shape[0]) videoWrite = cv2.VideoWriter(output_dir + video_name + '.mp4', fourcc, fps, size) for name in names: img = cv2.imread(name) videoWrite.write(img) videoWrite.release() def showimage(ax, img): ax.set_xticks([]) ax.set_yticks([]) plt.axis('off') ax.imshow(img) def get_pose3D(video_path, output_dir): args, _ = argparse.ArgumentParser().parse_known_args() args.embed_dim_ratio, args.depth, args.frames = 32, 4, 243 args.number_of_kept_frames, args.number_of_kept_coeffs = 27, 27 args.pad = (args.frames - 1) // 2 args.previous_dir = 'checkpoint/' args.n_joints, args.out_joints = 17, 17 ## Reload model = nn.DataParallel(Model(args=args)).cuda() model_dict = model.state_dict() # Put the pretrained model of PoseFormerV2 in 'checkpoint/'] model_path = sorted(glob.glob(os.path.join(args.previous_dir, '27_243_45.2.bin')))[0] pre_dict = torch.load(model_path) model.load_state_dict(pre_dict['model_pos'], strict=True) model.eval() ## input keypoints = np.load(output_dir + 'input_2D/keypoints.npz', allow_pickle=True)['reconstruction'] cap = cv2.VideoCapture(video_path) video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) ## 3D print('\nGenerating 3D pose...') for i in tqdm(range(video_length)): ret, img = cap.read() if img is None: continue img_size = img.shape ## input frames start = max(0, i - args.pad) end = min(i + args.pad, len(keypoints[0])-1) input_2D_no = keypoints[0][start:end+1] left_pad, right_pad = 0, 0 if input_2D_no.shape[0] != args.frames: if i < args.pad: left_pad = args.pad - i if i > len(keypoints[0]) - args.pad - 1: right_pad = i + args.pad - (len(keypoints[0]) - 1) input_2D_no = np.pad(input_2D_no, ((left_pad, right_pad), (0, 0), (0, 0)), 'edge') joints_left = [4, 5, 6, 11, 12, 13] joints_right = [1, 2, 3, 14, 15, 16] # input_2D_no += np.random.normal(loc=0.0, scale=5, size=input_2D_no.shape) input_2D = normalize_screen_coordinates(input_2D_no, w=img_size[1], h=img_size[0]) input_2D_aug = copy.deepcopy(input_2D) input_2D_aug[ :, :, 0] *= -1 input_2D_aug[ :, joints_left + joints_right] = input_2D_aug[ :, joints_right + joints_left] input_2D = np.concatenate((np.expand_dims(input_2D, axis=0), np.expand_dims(input_2D_aug, axis=0)), 0) # (2, 243, 17, 2) input_2D = input_2D[np.newaxis, :, :, :, :] input_2D = torch.from_numpy(input_2D.astype('float32')).cuda() N = input_2D.size(0) ## estimation output_3D_non_flip = model(input_2D[:, 0]) output_3D_flip = model(input_2D[:, 1]) # [1, 1, 17, 3] output_3D_flip[:, :, :, 0] *= -1 output_3D_flip[:, :, joints_left + joints_right, :] = output_3D_flip[:, :, joints_right + joints_left, :] output_3D = (output_3D_non_flip + output_3D_flip) / 2 output_3D[:, :, 0, :] = 0 post_out = output_3D[0, 0].cpu().detach().numpy() rot = [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088] rot = np.array(rot, dtype='float32') post_out = camera_to_world(post_out, R=rot, t=0) post_out[:, 2] -= np.min(post_out[:, 2]) input_2D_no = input_2D_no[args.pad] ## 2D image = show2Dpose(input_2D_no, copy.deepcopy(img)) output_dir_2D = output_dir +'pose2D/' os.makedirs(output_dir_2D, exist_ok=True) cv2.imwrite(output_dir_2D + str(('%04d'% i)) + '_2D.png', image) ## 3D fig = plt.figure(figsize=(9.6, 5.4)) gs = gridspec.GridSpec(1, 1) gs.update(wspace=-0.00, hspace=0.05) ax = plt.subplot(gs[0], projection='3d') show3Dpose( post_out, ax) output_dir_3D = output_dir +'pose3D/' os.makedirs(output_dir_3D, exist_ok=True) plt.savefig(output_dir_3D + str(('%04d'% i)) + '_3D.png', dpi=200, format='png', bbox_inches = 'tight') plt.clf() plt.close(fig) print('Generating 3D pose successful!') ## all image_dir = 'results/' image_2d_dir = sorted(glob.glob(os.path.join(output_dir_2D, '*.png'))) image_3d_dir = sorted(glob.glob(os.path.join(output_dir_3D, '*.png'))) print('\nGenerating demo...') for i in tqdm(range(len(image_2d_dir))): image_2d = plt.imread(image_2d_dir[i]) image_3d = plt.imread(image_3d_dir[i]) ## crop edge = (image_2d.shape[1] - image_2d.shape[0]) // 2 image_2d = image_2d[:, edge:image_2d.shape[1] - edge] edge = 130 image_3d = image_3d[edge:image_3d.shape[0] - edge, edge:image_3d.shape[1] - edge] ## show font_size = 12 fig = plt.figure(figsize=(15.0, 5.4)) ax = plt.subplot(121) showimage(ax, image_2d) ax.set_title("Input", fontsize = font_size) ax = plt.subplot(122) showimage(ax, image_3d) ax.set_title("Reconstruction", fontsize = font_size) ## save output_dir_pose = output_dir +'pose/' os.makedirs(output_dir_pose, exist_ok=True) plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) plt.savefig(output_dir_pose + str(('%04d'% i)) + '_pose.png', dpi=200, bbox_inches = 'tight') plt.clf() plt.close(fig) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--video', type=str, default='/home/khw/res/1130/golfdb/data/videos_160', help='input video') parser.add_argument('--gpu', type=str, default='0', help='input video') args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu input_dir = './data/pose2D/' output_dir = './data/pose3D/' os.makedirs(input_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True) all_videos = sorted(glob.glob(args.video+'/*mp4')) num_of_all_videos = len(all_videos) for i, video_path in enumerate(all_videos): video_name = video_path.split('/')[-1].split('.')[0] print('video_name: {}.mp4 ... ({}/{}) video processing.....'.format(video_name, i, num_of_all_videos)) get_pose2D(video_path, video_name, input_dir) get_pose3D(video_path, video_name, input_dir, output_dir) # img2video(video_path, output_dir) print('Generating demo successful!')

 

 

golfdb 깃헙 코드 clone하고 학습 시키고 나서 

eval.py 코드를 아래와 같이 바꾼다.

from model import EventDetector import torch from torch.utils.data import DataLoader from torchvision import transforms from dataloader import GolfDB, ToTensor, Normalize import torch.nn.functional as F import numpy as np from util import correct_preds from tqdm import tqdm def eval(model, split, seq_length, n_cpu, disp): dataset = GolfDB(data_file='data/val_split_{}.pkl'.format(split), vid_dir='data/videos_160/', seq_length=seq_length, transform=transforms.Compose([ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), train=False) data_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=n_cpu, drop_last=False) correct = [] for i, sample in enumerate(tqdm(data_loader, 0)): images, labels = sample['images'], sample['labels'] # full samples do not fit into GPU memory so evaluate sample in 'seq_length' batches batch = 0 while batch * seq_length < images.shape[1]: if (batch + 1) * seq_length > images.shape[1]: image_batch = images[:, batch * seq_length:, :, :, :] else: image_batch = images[:, batch * seq_length:(batch + 1) * seq_length, :, :, :] logits = model(image_batch.cuda()) if batch == 0: probs = F.softmax(logits.data, dim=1).cpu().numpy() else: probs = np.append(probs, F.softmax(logits.data, dim=1).cpu().numpy(), 0) batch += 1 _, _, _, _, c = correct_preds(probs, labels.squeeze()) if disp: print(i, c) correct.append(c) PCE = np.mean(correct) return PCE if __name__ == '__main__': splits = [1,2,3,4] seq_length = 64 n_cpu = 6 model = EventDetector(pretrain=True, width_mult=1., lstm_layers=1, lstm_hidden=256, bidirectional=True, dropout=False) save_dict = torch.load('models/swingnet_1800.pth.tar') model.load_state_dict(save_dict['model_state_dict']) model.cuda() model.eval() PCEs = [] for split in splits: PCE = eval(model, split, seq_length, n_cpu, False) PCEs.append(PCE) for i in range(len(PCEs)): print('Dataset Split {} ==> Average PCE: {}', format(split[i], PCEs[i])) print(f'All Average: {np.mean(PCEs)}')
Contents

포스팅 주소를 복사했습니다

이 글이 도움이 되었다면 공감 부탁드립니다.