问题遇到的现象和发生背景
我使用这个软件自带的坤坤视频就没有问题,但是用自己的视频来跑就生成失败了
问题相关代码,请勿粘贴截图
运行结果及报错内容
我的解答思路和尝试过的方法
我想要达到的结果
import time
import cv2
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.animation import FuncAnimation, writers
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from mpl_toolkits.mplot3d import Axes3D
from tqdm import tqdm
from common.utils import read_video
def ckpt_time(ckpt=None, display=0, desc=''):
if not ckpt:
return time.time()
else:
if display:
print(desc + ' consume time {:0.4f}'.format(time.time() - float(ckpt)))
return time.time() - float(ckpt), time.time()
def set_equal_aspect(ax, data):
"""
Create white cubic bounding box to make sure that 3d axis is in equal aspect.
:param ax: 3D axis
:param data: shape of(frames, 3), generated from BVH using convert_bvh2dataset.py
"""
X, Y, Z = data[..., 0], data[..., 1], data[..., 2]
# Create cubic bounding box to simulate equal aspect ratio
max_range = np.array([X.max() - X.min(), Y.max() - Y.min(), Z.max() - Z.min()]).max()
Xb = 0.5 * max_range * np.mgrid[-1:2:2, -1:2:2, -1:2:2][0].flatten() + 0.5 * (X.max() + X.min())
Yb = 0.5 * max_range * np.mgrid[-1:2:2, -1:2:2, -1:2:2][1].flatten() + 0.5 * (Y.max() + Y.min())
Zb = 0.5 * max_range * np.mgrid[-1:2:2, -1:2:2, -1:2:2][2].flatten() + 0.5 * (Z.max() + Z.min())
for xb, yb, zb in zip(Xb, Yb, Zb):
ax.plot([xb], [yb], [zb], 'w')
def downsample_tensor(X, factor):
length = X.shape[0] // factor * factor#//:对X.shape[0]除以factor的结果向负无穷方向取整后的数
return np.mean(X[:length].reshape(-1, factor, *X.shape[1:]), axis=1)
#[i:j]从下标i到下标j,截取序列s中的元素。i省略或者None时,就是0。
#np.mean():求均值。axis=1,计算每一行的均值;axis=0,计算每一列的均值。
#reshape(-1,a,b)。-1:表示列数固定,行数自动计算。
#*X.shape。使用*号作为可变参数列表,就可以在方法内对参数进行调用,也就是解包(元组)。
#fps:画面每秒传输的帧数;bitrate:码率;azim:相对方位角;viewpor:可视窗口;
def render_animation(keypoints, poses, skeleton, fps, bitrate, azim, output, viewport,
limit=-1, downsample=1, size=6, input_video_path=None, input_video_skip=0):
"""
TODO
Render an animation. The supported output modes are:
-- 'interactive': display an interactive figure
(also works on notebooks if associated with %matplotlib inline)
-- 'html': render the animation as HTML5 video. Can be displayed in a notebook using HTML(...).
-- 'filename.mp4': render and export the animation as an h264 video (requires ffmpeg).
-- 'filename.gif': render and export the animation a gif file (requires imagemagick).
"""
plt.ioff() # 显示前关闭交互模式:保持界面停留
fig = plt.figure(figsize=(size * (1 + len(poses)), size)) # 返回poses的值;弄张画板;size是高度,宽度是size * (1 + len(poses))
ax_in = fig.add_subplot(1, 1 + len(poses), 1)
ax_in.get_xaxis().set_visible(False) # 控制坐标轴、标签是否显示
ax_in.get_yaxis().set_visible(False) # 控制坐标轴、标签是否显示
ax_in.set_axis_off() # 控制坐标轴、标签是否显示
ax_in.set_title('Input') # 设置标题
# prevent wired error
_ = Axes3D.__class__.__name__#获得类的名字
ax_3d = []
lines_3d = []
trajectories = []#轨迹
radius = 1.7
for index, (title, data) in enumerate(poses.items()):#匹配,枚举
# 3D
ax = fig.add_subplot(1, 1 + len(poses), index + 2, projection='3d')#投影=3d
ax.view_init(elev=15., azim=azim)
# 设置长度范围
ax.set_xlim3d([-radius / 2, radius / 2])
ax.set_zlim3d([0, radius])
ax.set_ylim3d([-radius / 2, radius / 2])
# ax.set_aspect('equal')使各轴的单位相同
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_zticklabels([])
ax.dist = 12.5
# lxy add
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
ax.set_title(title) # , pad=35
ax_3d.append(ax)
lines_3d.append([])
trajectories.append(data[:, 0, [0, 1]])
poses = list(poses.values())
# Decode video解码视频
if input_video_path is None:#比较时,用“is None“而不是 ”== None“;”is“运算判断两个对象在内存中的地址是否一致;”==“ 运算符是比较对象的值是否相等。
# Black background
all_frames = np.zeros((keypoints.shape[0], viewport[1], viewport[0]), dtype='uint8')#在opencv-python中,若想为图像创建一个容器,需要指定dtype=np.uint8,否则虽然你的容器矩阵中是有值的,但是无法正常imshow
else:
# Load video using ffmpeg
all_frames = []
for f in read_video(input_video_path, fps=None, skip=input_video_skip):
all_frames.append(f)
effective_length = min(keypoints.shape[0], len(all_frames))#shape[0]:读取矩阵的第一维度的长度
all_frames = all_frames[:effective_length]
if downsample > 1:
keypoints = downsample_tensor(keypoints, downsample)
all_frames = downsample_tensor(np.array(all_frames), downsample).astype('uint8')#astype()改变np.array中所有数据元素的数据类型
for idx in range(len(poses)):
poses[idx] = downsample_tensor(poses[idx], downsample)
trajectories[idx] = downsample_tensor(trajectories[idx], downsample)
fps /= downsample#等效于fps=fps/downsample
initialized = False
image = None
lines = []
points = None
if limit < 1:
limit = len(all_frames)
else:
limit = min(limit, len(all_frames))
parents = skeleton.parents()#.parents():返回上级目录
pbar = tqdm(total=limit)#进度条
def update_video(i):
nonlocal initialized, image, lines, points #nonlocal:在函数内部创建一个函数,该函数使用变量x作为非局部变量
for n, ax in enumerate(ax_3d):
#enumerate:python的内置函数,对于一个可迭代的(iterable)/可遍历的对象(如列表、字符串),enumerate将其组成一个索引序列,利用它可以同时获得索引和值,多用于在for循环中得到计数。
ax.set_xlim3d([-radius / 2 + trajectories[n][i, 0], radius / 2 + trajectories[n][i, 0]])
ax.set_ylim3d([-radius / 2 + trajectories[n][i, 1], radius / 2 + trajectories[n][i, 1]])
# Update 2D poses
if not initialized:
image = ax_in.imshow(all_frames[i], aspect='equal')
#aspect:两条坐标轴的纵横比,也就是Y轴显示长度除以X轴显示长度的比例,若想图像不失真,需设置为equal。
for j, j_parent in enumerate(parents):
if j_parent == -1:
continue
# if len(parents) == keypoints.shape[1] and 1 == 2:
# # Draw skeleton only if keypoints match (otherwise we don't have the parents definition)
# lines.append(ax_in.plot([keypoints[i, j, 0], keypoints[i, j_parent, 0]],
# [keypoints[i, j, 1], keypoints[i, j_parent, 1]], color='pink'))
col = 'red' if j in skeleton.joints_right() else 'black'
for n, ax in enumerate(ax_3d):
pos = poses[n][i]
lines_3d[n].append(ax.plot([pos[j, 0], pos[j_parent, 0]],
[pos[j, 1], pos[j_parent, 1]],
[pos[j, 2], pos[j_parent, 2]], zdir='z', c=col))
points = ax_in.scatter(*keypoints[i].T, 5, color='red', edgecolors='white', zorder=10)
initialized = True
else:
image.set_data(all_frames[i])
for j, j_parent in enumerate(parents):
if j_parent == -1:
continue
# if len(parents) == keypoints.shape[1] and 1 == 2:
# lines[j - 1][0].set_data([keypoints[i, j, 0], keypoints[i, j_parent, 0]],
# [keypoints[i, j, 1], keypoints[i, j_parent, 1]])
for n, ax in enumerate(ax_3d):
pos = poses[n][i]
lines_3d[n][j - 1][0].set_xdata([pos[j, 0], pos[j_parent, 0]])
lines_3d[n][j - 1][0].set_ydata([pos[j, 1], pos[j_parent, 1]])
lines_3d[n][j - 1][0].set_3d_properties([pos[j, 2], pos[j_parent, 2]], zdir='z')
points.set_offsets(keypoints[i])#生成动态散点图
pbar.update()#进度条
#fig.tight_layout()
anim = FuncAnimation(fig, update_video, frames=limit, interval=1000.0 / fps, repeat=False)
if output.endswith('.mp4'):#endswith:判断字符串是否以指定字符或子字符串结尾
Writer = writers['ffmpeg']
writer = Writer(fps=fps, metadata={}, bitrate=bitrate)#metadata:注明数据库信息的数据
anim.save(output, writer=writer)
elif output.endswith('.gif'):
anim.save(output, dpi=60, writer='imagemagick')
else:
raise ValueError('Unsupported output format (only .mp4 and .gif are supported)')
pbar.close()
plt.close()
def render_animation_test(keypoints, poses, skeleton, fps, bitrate, azim, output, viewport, limit=-1, downsample=1,
size=6, input_video_frame=None, input_video_skip=0, num=None):
t0 = ckpt_time()
fig = plt.figure(figsize=(12, 6))
canvas = FigureCanvas(fig)
fig.add_subplot(121)
plt.imshow(input_video_frame)
# 3D
ax = fig.add_subplot(122, projection='3d')
ax.view_init(elev=15., azim=azim)
# set 长度范围
radius = 1.7
ax.set_xlim3d([-radius / 2, radius / 2])
ax.set_zlim3d([0, radius])
ax.set_ylim3d([-radius / 2, radius / 2])
ax.set_aspect('equal')
# 坐标轴刻度
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_zticklabels([])
ax.dist = 7.5
# lxy add
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
# array([-1, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15])
parents = skeleton.parents()
pos = poses['Reconstruction'][-1]
_, t1 = ckpt_time(t0, desc='1 ')
for j, j_parent in enumerate(parents):
if j_parent == -1:
continue
if len(parents) == keypoints.shape[1]:
color_pink = 'pink'
if j == 1 or j == 2:
color_pink = 'black'
col = 'red' if j in skeleton.joints_right() else 'black'
# 画图3D
ax.plot([pos[j, 0], pos[j_parent, 0]],
[pos[j, 1], pos[j_parent, 1]],
[pos[j, 2], pos[j_parent, 2]], zdir='z', c=col)
# plt.savefig('test/3Dimage_{}.png'.format(1000+num))
width, height = fig.get_size_inches() * fig.get_dpi()
_, t2 = ckpt_time(t1, desc='2 ')
canvas.draw() # draw the canvas, cache the renderer
image = np.fromstring(canvas.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
cv2.imshow('im', image)
cv2.waitKey(5)
_, t3 = ckpt_time(t2, desc='3 ')
return image