Source code for mlreco.iotools.parsers.clean_data

import numpy as np
from mlreco.utils.groups import filter_duplicate_voxels_ref, filter_nonimg_voxels


[docs]def clean_sparse_data(grp_voxels, grp_data, img_voxels, img_data, meta, precedence): """ Helper that factorizes common cleaning operations required when trying to match true sparse3d and cluster3d data products. 1) lexicographically sort group data (images are lexicographically sorted) 2) remove voxels from group data that are not in image 3) choose only one group per voxel (by lexicographic order) Parameters ---------- grp_voxels: np.ndarray grp_data: np.ndarray img_voxels: np.ndarray img_data: np.ndarray meta: larcv::Meta precedence: list Returns ------- grp_voxels: np.ndarray grp_data: np.ndarray """ # Step 1: lexicographically sort group data perm = np.lexsort(grp_voxels.T) grp_voxels = grp_voxels[perm,:] grp_data = grp_data[perm] perm = np.lexsort(img_voxels.T) img_voxels = img_voxels[perm,:] img_data = img_data[perm] # Step 2: remove duplicates sel1 = filter_duplicate_voxels_ref(grp_voxels, grp_data[:,-1], meta, usebatch=True, precedence=precedence) inds1 = np.where(sel1)[0] grp_voxels = grp_voxels[inds1,:] grp_data = grp_data[inds1] # Step 3: remove voxels not in image sel2 = filter_nonimg_voxels(grp_voxels, img_voxels, usebatch=False) inds2 = np.where(sel2)[0] grp_voxels = grp_voxels[inds2,:] grp_data = grp_data[inds2] return grp_voxels, grp_data