Datasets

Dataset folder structure

SURREAL/data/
------------- cmu/  # using MoCap from CMU dataset
-------------------- train/
-------------------- val/ # small subset of test
-------------------- test/
----------------------------  run0/ #50% overlap
----------------------------  run1/ #30% overlap
----------------------------  run2/ #70% overlap
------------------------------------  <sequenceName>/ #e.g. 01_01
--------------------------------------------------  <sequenceName>_c%04d.mp4        # RGB - 240x320 resolution video
--------------------------------------------------  <sequenceName>_c%04d_depth.mat  # Depth
#     depth_1,   depth_2, ...  depth_T [240x320 single] - in meters
--------------------------------------------------  <sequenceName>_c%04d_segm.mat   # Segmentation
#     segm_1,     segm_2, ...   segm_T [240x320 uint8]  - 0 for background and 1..24 for SMPL body parts
--------------------------------------------------  <sequenceName>_c%04d_gtflow.mat # Ground truth optical flow
#     gtflow_1, gtflow_2, ... gtflow_T [240x320x2 single]
--------------------------------------------------  <sequenceName>_c%04d_info.mat   # Remaining annotation
#     bg           [1xT cell]      - names of background image files
#     camDist      [1 single]      - camera distance
#     camLoc       [3x1 single]    - camera location
#     clipNo       [1 double]      - clip number of the full sequence (corresponds to the c%04d part of the file)
#     cloth        [1xT cell]      - names of texture image files
#     gender       [Tx1 uint8]     - gender (0: 'female', 1: 'male')
#     joints2D     [2x24xT single] - 2D coordinates of 24 SMPL body joints on the image pixels
#     joints3D     [3x24xT single] - 3D coordinates of 24 SMPL body joints in real world meters
#     light        [9x100 single]  - spherical harmonics lighting coefficients
#     pose         [72xT single]   - SMPL parameters (axis-angle)
#     sequence     [char]          - <sequenceName>_c%04d
#     shape        [10xT single]   - body shape parameters
#     source       [char]          - 'cmu'
#     stride       [1 uint8]       - percent overlap between clips, 30 or 50 or 70
#     zrot         [Tx1 single]    - rotation in Z (euler angle)
# *** T is the number of frames, mostly 100.

Extracting data from `.mat` files

import scipy
import scipy.io
import numpy as np

DATA_PREFIX = '/nfs/154/dataset2/SURREAL/data'
RUN_PREFIX = 'cmu/train/run1'
SEQUENCE_NAME = 'ung_132_07'
CLIP = 1
FILENAME_PREFIX = f'{DATA_PREFIX}/{RUN_PREFIX}/{SEQUENCE_NAME}/{SEQUENCE_NAME}_c{CLIP:04d}'

INFO = 'depth'
FILENAME = f'{FILENAME_PREFIX}_{INFO}.mat'

# eg. './ung_132_07_c0001_depth.mat'
depth = scipy.io.loadmat(FILENAME)
depth_array = np.stack(list(map(
lambda key: depth[key],
sorted(
filter(
lambda s:isinstance(depth[s], np.ndarray),
depth.keys()
),
key=lambda s: int(s.split('_')[-1])
)
)))

Extracting `joints2D` data

import scipy
import scipy.io
import numpy as np

DATA_PREFIX = '/nfs/154/dataset2/SURREAL/data'
RUN_PREFIX = 'cmu/train/run1'
SEQUENCE_NAME = 'ung_132_07'
CLIP = 1
FILENAME_PREFIX = f'{DATA_PREFIX}/{RUN_PREFIX}/{SEQUENCE_NAME}/{SEQUENCE_NAME}_c{CLIP:04d}'

INFO = 'info'
FILENAME = f'{FILENAME_PREFIX}_{INFO}.mat'

info = scipy.io.loadmat(FILENAME)

KEY = 'joints2D'
joints2D_array = info[KEY]

# permute axes to arrange num_framesXnum_jointsXnum_coords
joints2D_array = joints2D_array.transpose(2,1,0)

# The resulting values are integral (cast into float) bounded by:
IMAGE_SIZE = (240,320) # H, W

Datasets

Epic Kitchens

Audioset-modified

MS COCO 2014

SURREAL Dataset

Dataset folder structure

Extracting data from `.mat` files

Extracting `joints2D` data

Datasets

Epic Kitchens

Audioset-modified

MS COCO 2014

SURREAL Dataset

Dataset folder structure

Extracting data from .mat files

Extracting joints2D data

Extracting data from `.mat` files

Extracting `joints2D` data