Project
모바일 로봇 프로젝트 3 - Human Pose
- -
Human Pose를 다루기 위해서
PoseFormerV2를 추천한다
여기에 demo/vis.py에 유용한 함수들이 많다.
2D/3D pose 시각화랑
H36M에 맞춘 Pose format Transformation 코드등이 있다.
https://github.com/QitaoZhao/PoseFormerV2
여기서 vis.py 코드에서 내가 원하는 폴더의 mp4 파일들을 모두 불러서 2D pose랑 3D pose를 만드는 코드가 아래와 같다.
import sys
import argparse
import cv2
from lib.preprocess import h36m_coco_format, revise_kpts
from lib.hrnet.gen_kpts import gen_video_kpts as hrnet_pose
import os
import numpy as np
import torch
import torch.nn as nn
import glob
from tqdm import tqdm
import copy
sys.path.append(os.getcwd())
from common.model_poseformer import PoseTransformerV2 as Model
from common.camera import *
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.gridspec as gridspec
plt.switch_backend('agg')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
def show2Dpose(kps, img):
connections = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5],
[5, 6], [0, 7], [7, 8], [8, 9], [9, 10],
[8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16]]
LR = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], dtype=bool)
lcolor = (255, 0, 0)
rcolor = (0, 0, 255)
thickness = 3
for j,c in enumerate(connections):
start = map(int, kps[c[0]])
end = map(int, kps[c[1]])
start = list(start)
end = list(end)
cv2.line(img, (start[0], start[1]), (end[0], end[1]), lcolor if LR[j] else rcolor, thickness)
cv2.circle(img, (start[0], start[1]), thickness=-1, color=(0, 255, 0), radius=3)
cv2.circle(img, (end[0], end[1]), thickness=-1, color=(0, 255, 0), radius=3)
return img
def show3Dpose(vals, ax):
ax.view_init(elev=15., azim=70)
lcolor=(0,0,1)
rcolor=(1,0,0)
I = np.array( [0, 0, 1, 4, 2, 5, 0, 7, 8, 8, 14, 15, 11, 12, 8, 9])
J = np.array( [1, 4, 2, 5, 3, 6, 7, 8, 14, 11, 15, 16, 12, 13, 9, 10])
LR = np.array([0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0], dtype=bool)
for i in np.arange( len(I) ):
x, y, z = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(3)]
ax.plot(x, y, z, lw=2, color = lcolor if LR[i] else rcolor)
RADIUS = 0.72
RADIUS_Z = 0.7
xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2]
ax.set_xlim3d([-RADIUS+xroot, RADIUS+xroot])
ax.set_ylim3d([-RADIUS+yroot, RADIUS+yroot])
ax.set_zlim3d([-RADIUS_Z+zroot, RADIUS_Z+zroot])
ax.set_aspect('auto') # works fine in matplotlib==2.2.2
white = (1.0, 1.0, 1.0, 0.0)
ax.xaxis.set_pane_color(white)
ax.yaxis.set_pane_color(white)
ax.zaxis.set_pane_color(white)
ax.tick_params('x', labelbottom = False)
ax.tick_params('y', labelleft = False)
ax.tick_params('z', labelleft = False)
def get_pose2D(video_path, video_name, input_dir):
cap = cv2.VideoCapture(video_path)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
print('\nGenerating 2D pose...')
keypoints, scores = hrnet_pose(video_path, det_dim=416, num_peroson=1, gen_output=True) # keypoints.shape: (1, 138, 17, 2) / scores.shape: (1, 138, 17)
keypoints, scores, valid_frames = h36m_coco_format(keypoints, scores)
scores = scores.reshape(scores.shape[0], scores.shape[1], scores.shape[2], 1)
kps_conf = np.concatenate([keypoints, scores], axis=3)
print('Generating 2D pose successful!')
output_npz = input_dir + f'{video_name}.npz'
np.savez_compressed(output_npz, reconstruction=kps_conf)
def img2video(video_path, output_dir):
cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS)) + 5
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
names = sorted(glob.glob(os.path.join(output_dir + 'pose/', '*.png')))
img = cv2.imread(names[0])
size = (img.shape[1], img.shape[0])
videoWrite = cv2.VideoWriter(output_dir + video_name + '.mp4', fourcc, fps, size)
for name in names:
img = cv2.imread(name)
videoWrite.write(img)
videoWrite.release()
def showimage(ax, img):
ax.set_xticks([])
ax.set_yticks([])
plt.axis('off')
ax.imshow(img)
def get_pose3D(video_path, output_dir):
args, _ = argparse.ArgumentParser().parse_known_args()
args.embed_dim_ratio, args.depth, args.frames = 32, 4, 243
args.number_of_kept_frames, args.number_of_kept_coeffs = 27, 27
args.pad = (args.frames - 1) // 2
args.previous_dir = 'checkpoint/'
args.n_joints, args.out_joints = 17, 17
## Reload
model = nn.DataParallel(Model(args=args)).cuda()
model_dict = model.state_dict()
# Put the pretrained model of PoseFormerV2 in 'checkpoint/']
model_path = sorted(glob.glob(os.path.join(args.previous_dir, '27_243_45.2.bin')))[0]
pre_dict = torch.load(model_path)
model.load_state_dict(pre_dict['model_pos'], strict=True)
model.eval()
## input
keypoints = np.load(output_dir + 'input_2D/keypoints.npz', allow_pickle=True)['reconstruction']
cap = cv2.VideoCapture(video_path)
video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
## 3D
print('\nGenerating 3D pose...')
for i in tqdm(range(video_length)):
ret, img = cap.read()
if img is None:
continue
img_size = img.shape
## input frames
start = max(0, i - args.pad)
end = min(i + args.pad, len(keypoints[0])-1)
input_2D_no = keypoints[0][start:end+1]
left_pad, right_pad = 0, 0
if input_2D_no.shape[0] != args.frames:
if i < args.pad:
left_pad = args.pad - i
if i > len(keypoints[0]) - args.pad - 1:
right_pad = i + args.pad - (len(keypoints[0]) - 1)
input_2D_no = np.pad(input_2D_no, ((left_pad, right_pad), (0, 0), (0, 0)), 'edge')
joints_left = [4, 5, 6, 11, 12, 13]
joints_right = [1, 2, 3, 14, 15, 16]
# input_2D_no += np.random.normal(loc=0.0, scale=5, size=input_2D_no.shape)
input_2D = normalize_screen_coordinates(input_2D_no, w=img_size[1], h=img_size[0])
input_2D_aug = copy.deepcopy(input_2D)
input_2D_aug[ :, :, 0] *= -1
input_2D_aug[ :, joints_left + joints_right] = input_2D_aug[ :, joints_right + joints_left]
input_2D = np.concatenate((np.expand_dims(input_2D, axis=0), np.expand_dims(input_2D_aug, axis=0)), 0)
# (2, 243, 17, 2)
input_2D = input_2D[np.newaxis, :, :, :, :]
input_2D = torch.from_numpy(input_2D.astype('float32')).cuda()
N = input_2D.size(0)
## estimation
output_3D_non_flip = model(input_2D[:, 0])
output_3D_flip = model(input_2D[:, 1])
# [1, 1, 17, 3]
output_3D_flip[:, :, :, 0] *= -1
output_3D_flip[:, :, joints_left + joints_right, :] = output_3D_flip[:, :, joints_right + joints_left, :]
output_3D = (output_3D_non_flip + output_3D_flip) / 2
output_3D[:, :, 0, :] = 0
post_out = output_3D[0, 0].cpu().detach().numpy()
rot = [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088]
rot = np.array(rot, dtype='float32')
post_out = camera_to_world(post_out, R=rot, t=0)
post_out[:, 2] -= np.min(post_out[:, 2])
input_2D_no = input_2D_no[args.pad]
## 2D
image = show2Dpose(input_2D_no, copy.deepcopy(img))
output_dir_2D = output_dir +'pose2D/'
os.makedirs(output_dir_2D, exist_ok=True)
cv2.imwrite(output_dir_2D + str(('%04d'% i)) + '_2D.png', image)
## 3D
fig = plt.figure(figsize=(9.6, 5.4))
gs = gridspec.GridSpec(1, 1)
gs.update(wspace=-0.00, hspace=0.05)
ax = plt.subplot(gs[0], projection='3d')
show3Dpose( post_out, ax)
output_dir_3D = output_dir +'pose3D/'
os.makedirs(output_dir_3D, exist_ok=True)
plt.savefig(output_dir_3D + str(('%04d'% i)) + '_3D.png', dpi=200, format='png', bbox_inches = 'tight')
plt.clf()
plt.close(fig)
print('Generating 3D pose successful!')
## all
image_dir = 'results/'
image_2d_dir = sorted(glob.glob(os.path.join(output_dir_2D, '*.png')))
image_3d_dir = sorted(glob.glob(os.path.join(output_dir_3D, '*.png')))
print('\nGenerating demo...')
for i in tqdm(range(len(image_2d_dir))):
image_2d = plt.imread(image_2d_dir[i])
image_3d = plt.imread(image_3d_dir[i])
## crop
edge = (image_2d.shape[1] - image_2d.shape[0]) // 2
image_2d = image_2d[:, edge:image_2d.shape[1] - edge]
edge = 130
image_3d = image_3d[edge:image_3d.shape[0] - edge, edge:image_3d.shape[1] - edge]
## show
font_size = 12
fig = plt.figure(figsize=(15.0, 5.4))
ax = plt.subplot(121)
showimage(ax, image_2d)
ax.set_title("Input", fontsize = font_size)
ax = plt.subplot(122)
showimage(ax, image_3d)
ax.set_title("Reconstruction", fontsize = font_size)
## save
output_dir_pose = output_dir +'pose/'
os.makedirs(output_dir_pose, exist_ok=True)
plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
plt.margins(0, 0)
plt.savefig(output_dir_pose + str(('%04d'% i)) + '_pose.png', dpi=200, bbox_inches = 'tight')
plt.clf()
plt.close(fig)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--video', type=str, default='/home/khw/res/1130/golfdb/data/videos_160', help='input video')
parser.add_argument('--gpu', type=str, default='0', help='input video')
args = parser.parse_args()
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
input_dir = './data/pose2D/'
output_dir = './data/pose3D/'
os.makedirs(input_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
all_videos = sorted(glob.glob(args.video+'/*mp4'))
num_of_all_videos = len(all_videos)
for i, video_path in enumerate(all_videos):
video_name = video_path.split('/')[-1].split('.')[0]
print('video_name: {}.mp4 ... ({}/{}) video processing.....'.format(video_name, i, num_of_all_videos))
get_pose2D(video_path, video_name, input_dir)
get_pose3D(video_path, video_name, input_dir, output_dir)
# img2video(video_path, output_dir)
print('Generating demo successful!')
golfdb 깃헙 코드 clone하고 학습 시키고 나서
eval.py 코드를 아래와 같이 바꾼다.
from model import EventDetector
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from dataloader import GolfDB, ToTensor, Normalize
import torch.nn.functional as F
import numpy as np
from util import correct_preds
from tqdm import tqdm
def eval(model, split, seq_length, n_cpu, disp):
dataset = GolfDB(data_file='data/val_split_{}.pkl'.format(split),
vid_dir='data/videos_160/',
seq_length=seq_length,
transform=transforms.Compose([ToTensor(),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
train=False)
data_loader = DataLoader(dataset,
batch_size=1,
shuffle=False,
num_workers=n_cpu,
drop_last=False)
correct = []
for i, sample in enumerate(tqdm(data_loader, 0)):
images, labels = sample['images'], sample['labels']
# full samples do not fit into GPU memory so evaluate sample in 'seq_length' batches
batch = 0
while batch * seq_length < images.shape[1]:
if (batch + 1) * seq_length > images.shape[1]:
image_batch = images[:, batch * seq_length:, :, :, :]
else:
image_batch = images[:, batch * seq_length:(batch + 1) * seq_length, :, :, :]
logits = model(image_batch.cuda())
if batch == 0:
probs = F.softmax(logits.data, dim=1).cpu().numpy()
else:
probs = np.append(probs, F.softmax(logits.data, dim=1).cpu().numpy(), 0)
batch += 1
_, _, _, _, c = correct_preds(probs, labels.squeeze())
if disp:
print(i, c)
correct.append(c)
PCE = np.mean(correct)
return PCE
if __name__ == '__main__':
splits = [1,2,3,4]
seq_length = 64
n_cpu = 6
model = EventDetector(pretrain=True,
width_mult=1.,
lstm_layers=1,
lstm_hidden=256,
bidirectional=True,
dropout=False)
save_dict = torch.load('models/swingnet_1800.pth.tar')
model.load_state_dict(save_dict['model_state_dict'])
model.cuda()
model.eval()
PCEs = []
for split in splits:
PCE = eval(model, split, seq_length, n_cpu, False)
PCEs.append(PCE)
for i in range(len(PCEs)):
print('Dataset Split {} ==> Average PCE: {}', format(split[i], PCEs[i]))
print(f'All Average: {np.mean(PCEs)}')
'Project' 카테고리의 다른 글
AI캐디 프로젝트 2 - Action Recognition (0) | 2024.12.10 |
---|---|
AI캐디 프로젝트 1 - Flask 실시간 webcam - SAM2 (2) | 2024.12.08 |
모바일 로봇 프로젝트 2 - SAM2 (3) | 2024.11.30 |
모바일 로봇 프로젝트 1 - YOLO (0) | 2024.11.30 |
Langchain 프롬프트 메이커 및 미니 프로젝트 (0) | 2024.09.24 |
Contents
소중한 공감 감사합니다