Source code for mlreco.utils.groups

# utility function to reconcile groups data with energy deposition and 5-types data:
# problem: parse_cluster3d and parse_sparse3d will not output voxels in same order
# additionally, some voxels in groups data do not deposit energy, so do not appear in images
# also, some voxels have more than one group.
# plan is to put in a function to:
# 1) lexicographically sort group data (images are lexicographically sorted)
# 2) remove voxels from group data that are not in image
# 3) choose only one group per voxel (by lexicographic order)
# WARNING: (3) is certainly not a canonical choice

import numpy as np
import torch
from larcv import larcv

[docs]def get_group_types(particle_v, meta, point_type="3d"): """ Gets particle classes for voxel groups """ if point_type not in ["3d", "xy", "yz", "zx"]: raise Exception("Point type not supported in PPN I/O.") gt_types = [] for particle in particle_v: pdg_code = abs(particle.pdg_code()) prc = particle.creation_process() # Determine point type if (pdg_code == 2212): gt_type = 0 # proton elif pdg_code != 22 and pdg_code != 11: gt_type = 1 elif pdg_code == 22: gt_type = 2 else: if prc == "primary" or prc == "nCapture" or prc == "conv": gt_type = 2 # em shower elif prc == "muIoni" or prc == "hIoni": gt_type = 3 # delta elif prc == "muMinusCaptureAtRest" or prc == "muPlusCaptureAtRest" or prc == "Decay": gt_type = 4 # michel else: gt_type = -1 # not well defined gt_types.append(gt_type) return np.array(gt_types)
[docs]def filter_duplicate_voxels(data, usebatch=True): """ return array that will filter out duplicate voxels Only first instance of voxel will appear Assume data[:4] = [x,y,z,batchid] Assumes data is lexicographically sorted in x,y,z,batch order """ # set number of cols to look at if usebatch: k = 4 else: k = 3 n = data.shape[0] ret = np.empty(n, dtype=bool) for i in range(1,n): if np.all(data[i-1,:k] == data[i,:k]): # duplicate voxel ret[i-1] = False else: # new voxel ret[i-1] = True ret[n-1] = True # ret[0] = True # for i in range(n-1): # if np.all(data[i,:k] == data[i+1,:k]): # # duplicate voxel # ret[i+1] = False # else: # # new voxel # ret[i+1] = True return ret
[docs]def filter_duplicate_voxels_ref(data, reference, meta, usebatch=True, precedence=[1,2,0,3,4]): """ return array that will filter out duplicate voxels Sort with respect to a reference and following the specified precedence order Assume data[:4] = [x,y,z,batchid] Assumes data is lexicographically sorted in x,y,z,batch order """ # set number of cols to look at if usebatch: k = 4 else: k = 3 n = data.shape[0] ret = np.full(n, True, dtype=bool) duplicates = {} for i in range(1,n): if np.all(data[i-1,:k] == data[i,:k]): x, y, z = int(data[i,0]), int(data[i,1]), int(data[i,2]) id = meta.index(x, y, z) if id in duplicates: duplicates[id].append(i) else: duplicates[id] = [i-1, i] for d in duplicates.values(): ref = np.array([precedence.index(r) for r in reference[d]]) args = np.argsort(-ref, kind='mergesort') # Must preserve of order of duplicates ret[np.array(d)[args[:-1]]] = False return ret
[docs]def filter_nonimg_voxels(data_grp, data_img, usebatch=True): """ return array that will filter out voxels in data_grp that are not in data_img ASSUME: data_grp and data_img are lexicographically sorted in x,y,z,batch order ASSUME: all points in data_img are also in data_grp ASSUME: all voxels in data are unique """ # set number of cols to look at if usebatch: k = 4 else: k = 3 ngrp = data_grp.shape[0] nimg = data_img.shape[0] igrp = 0 iimg = 0 ret = np.empty(ngrp, dtype=bool) # return array while igrp < ngrp and iimg < nimg: if np.all(data_grp[igrp,:k] == data_img[iimg,:k]): # voxel is in both data ret[igrp] = True igrp += 1 iimg += 1 else: # voxel is in data_grp, but not data_img ret[igrp] = False igrp += 1 # need to go through rest of data_grp if any left while igrp < ngrp: ret[igrp] = False igrp += 1 return ret
[docs]def filter_group_data(data_grp, data_img): """ return return array that will permute and filter out voxels so that data_grp and data_img have same voxel locations 1) lexicographically sort group data (images are lexicographically sorted) 2) remove voxels from group data that are not in image 3) choose only one group per voxel (by lexicographic order) WARNING: (3) is certainly not a canonical choice """ # step 1: lexicographically sort group data perm = np.lexsort(data_grp[:,:-1:].T) data_grp = data_grp[perm,:] # step 2: remove duplicates sel1 = filter_duplicate_voxels(data_grp) inds1 = np.where(sel1)[0] data_grp = data_grp[inds1,:] # step 3: remove voxels not in image sel2 = filter_nonimg_voxels(data_grp, data_img) inds2 = np.where(sel2)[0] return perm[inds1[inds2]]
[docs]def process_group_data(data_grp, data_img): """ return processed group data 1) lexicographically sort group data (images are lexicographically sorted) 2) remove voxels from group data that are not in image 3) choose only one group per voxel (by lexicographic order) WARNING: (3) is certainly not a canonical choice """ data_grp_np = data_grp.cpu().detach().numpy() data_img_np = data_img.cpu().detach().numpy() inds = filter_group_data(data_grp_np, data_img_np) return data_grp[inds,:]
[docs]def get_interaction_id(particle_v, num_ancestor_loop=1): ''' A function to sort out interaction ids. Note that this assumes cluster_id==particle_id. Inputs: - particle_v (array) : larcv::EventParticle.as_vector() - num_ancestor_loop (int): number of ancestor loops (default 1) Outputs: - interaction_ids: a numpy array with the shape (n,) ''' ########################################################################## # sort out the interaction ids using the information of ancestor vtx info # then loop over to make sure the ancestor particles having the same interaction ids ########################################################################## # get the particle ancestor vtx array first # and the track ids # and the ancestor track ids ancestor_vtxs = [] track_ids = [] ancestor_track_ids = np.empty(0, dtype=int) for particle in particle_v: ancestor_vtx = [ particle.ancestor_x(), particle.ancestor_y(), particle.ancestor_z(), ] ancestor_vtxs.append(ancestor_vtx) track_ids.append(particle.track_id()) ancestor_track_ids = np.append(ancestor_track_ids, [particle.ancestor_track_id()]) ancestor_vtxs = np.asarray(ancestor_vtxs) # get the list of unique interaction vertexes interaction_vtx_list = np.unique( ancestor_vtxs, axis=0, ).tolist() # loop over each cluster to assign interaction ids interaction_ids = np.ones(particle_v.size(), dtype=int)*(-1) for clust_id in range(particle_v.size()): # get the interaction id from the unique list (index is the id) interaction_ids[clust_id] = interaction_vtx_list.index( ancestor_vtxs[clust_id].tolist() ) # Loop over ancestor, making sure particle having the same interaction id as ancestor for _ in range(num_ancestor_loop): for clust_id, ancestor_track_id in enumerate(ancestor_track_ids): if ancestor_track_id in track_ids: ancestor_clust_index = track_ids.index(ancestor_track_id) interaction_ids[clust_id] = interaction_ids[ancestor_clust_index] return interaction_ids
[docs]def get_nu_id(cluster_event, particle_v, interaction_ids, particle_mpv=None): ''' A function to sorts interactions into nu or not nu (0 for cosmic, 1 for nu). CAVEAT: Dirty way to sort out nu_ids Assuming only one nu interaction is generated and first group/cluster belongs to such interaction Inputs: - cluster_event (larcv::EventClusterVoxel3D): (N) Array of cluster tensors - particle_v vector: larcv::EventParticle.as_vector() - interaction_id: a numpy array with shape (n, 1) where 1 is interaction id - (optional) particle_mpv: vector of particles from mpv generator, used to work around the lack of proper interaction id for the time being. Outputs: - nu_id: a numpy array with the shape (n,1) ''' # initiate the nu_id nu_id = np.zeros(len(particle_v)) if particle_mpv is None: # find the first cluster that has nonzero size sizes = np.array([cluster_event.as_vector()[i].as_vector().size() for i in range(len(particle_v))]) nonzero = np.where(sizes > 0)[0] if not len(nonzero): return nu_id first_clust_id = nonzero[0] # the corresponding interaction id nu_interaction_id = interaction_ids[first_clust_id] # Get clust indexes for interaction_id = nu_interaction_id inds = np.where(interaction_ids == nu_interaction_id)[0] # Check whether there're at least two clusts coming from 'primary' process num_primary = 0 for i, part in enumerate(particle_v): if i not in inds: continue create_prc = part.creation_process() parent_pdg = part.parent_pdg_code() if create_prc == 'primary' or parent_pdg == 111: num_primary += 1 # if there is nu interaction if num_primary > 1: nu_id[inds] = 1 elif len(particle_mpv) > 0: # Find mpv particles is_mpv = np.zeros((len(particle_v),)) # mpv_ids = [p.id() for p in particle_mpv] mpv_pdg = np.array([p.pdg_code() for p in particle_mpv]).reshape((-1,)) mpv_energy = np.array([p.energy_init() for p in particle_mpv]).reshape((-1,)) for idx, part in enumerate(particle_v): # track_id - 1 in `particle_pcluster_tree` corresponds to id (or track_id) in `particle_mpv_tree` # if (part.track_id()-1) in mpv_ids or (part.ancestor_track_id()-1) in mpv_ids: # FIXME the above was wrong I think. close = np.isclose(part.energy_init()*1e-3, mpv_energy) pdg = part.pdg_code() == mpv_pdg if (close & pdg).any(): is_mpv[idx] = 1. # else: # print("fake cosmic", part.pdg_code(), part.shape(), part.creation_process(), part.track_id(), part.ancestor_track_id(), mpv_ids) is_mpv = is_mpv.astype(bool) nu_interaction_ids = np.unique(interaction_ids[is_mpv]) for idx, x in enumerate(nu_interaction_ids): # # Check whether there're at least two clusts coming from 'primary' process # num_primary = 0 # for part in particle_v[interaction_ids == x]: # if part.creation_process() == 'primary': # num_primary += 1 # if num_primary > 1: nu_id[interaction_ids == x] = 1 # Only tells whether neutrino or not # nu_id[interaction_ids == x] = idx return nu_id
type_labels = { 22: 0, # photon 11: 1, # e- -11: 1, # e+ 13: 2, # mu- -13: 2, # mu+ 211: 3, # pi+ -211: 3, # pi- 2212: 4, # protons }
[docs]def get_particle_id(particles_v, nu_ids, include_mpr=False, include_secondary=False): ''' Function that gives one of five labels to particles of particle species predictions. This function ensures: - Particles that do not originate from an MPV are labeled -1, unless the include_mpr flag is set to true - Secondary particles (includes Michel/delta and neutron activity) are labeled -1, unless the include_secondary flag is true - All shower daughters are labeled the same as their primary. This makes sense as otherwise an electron primary gets overruled by its many photon daughters (voxel-wise majority vote). This can lead to problems as, if an electron daughter is not clustered with the primary, it is labeled electron, which is counter-intuitive. This is handled downstream with the high_purity flag. - Particles that are not in the list target are labeled -1 Inputs: - particles_v (array of larcv::Particle) : (N) LArCV Particle objects - nu_ids: a numpy array with shape (n, 1) where 1 is neutrino id (0 if not an MPV) - include_mpr: include MPR (cosmic-like) particles to PID target - include_secondary: include secondary particles into the PID target Outputs: - array: (N) list of group ids ''' particle_ids = np.empty(len(nu_ids)) primary_ids = get_group_primary_id(particles_v, nu_ids, include_mpr) for i in range(len(particle_ids)): # If the primary ID is invalid, assign invalid if primary_ids[i] < 0: particle_ids[i] = -1 continue # If secondary particles are not included and primary_id < 1, assign invalid if not include_secondary and primary_ids[i] < 1: particle_ids[i] = -1 continue # If the particle type exists in the predefined list, assign group_id = int(particles_v[i].group_id()) t = int(particles_v[group_id].pdg_code()) if t in type_labels.keys(): particle_ids[i] = type_labels[t] else: particle_ids[i] = -1 return particle_ids
[docs]def get_shower_primary_id(cluster_event, particles_v): ''' Function that assigns valid primary tags to shower fragments. This could be handled somewhere else (e.g. SUPERA) Inputs: - cluster_event (larcv::EventClusterVoxel3D): (N) Array of cluster tensors - particles_v (array of larcv::Particle) : (N) LArCV Particle objects Outputs: - array: (N) list of group ids ''' # Loop over the list of particles group_ids = np.array([p.group_id() for p in particles_v]) primary_ids = np.empty(particles_v.size(), dtype=np.int32) for i, p in enumerate(particles_v): # If the particle is a track or a low energy cluster, it is not a primary shower fragment if p.shape() == 1 or p.shape() == 4: primary_ids[i] = 0 continue # If a particle is a Delta or a Michel, it is a primary shower fragment if p.shape() == 2 or p.shape() == 3: primary_ids[i] = 1 continue # If the shower fragment originates from nuclear activity, it is not a primary process = p.creation_process() parent_pdg_code = abs(p.parent_pdg_code()) if 'Inelastic' in process or 'Capture' in process or parent_pdg_code == 2112: primary_ids[i] = 0 continue # If a shower group's parent fragment has size zero, there is no valid primary in the group gid = int(p.group_id()) parent_size = cluster_event.as_vector()[gid].as_vector().size() if not parent_size: primary_ids[i] = 0 continue # If a shower group's parent fragment is not the first in time, there is no valid primary in the group idxs = np.where(group_ids == gid)[0] clust_times = np.array([particles_v[int(j)].first_step().t() for j in idxs]) min_id = np.argmin(clust_times) if idxs[min_id] != gid : primary_ids[i] = 0 continue # If all conditions are met, label shower fragments which have identical ID and group ID as primary primary_ids[i] = int(gid == i) return primary_ids
[docs]def get_group_primary_id(particles_v, nu_ids=None, include_mpr=True): ''' Function that assigns valid primary tags to particle groups. This could be handled somewhere else (e.g. SUPERA) Inputs: - particles_v (array of larcv::Particle) : (N) LArCV Particle objects - nu_ids: a numpy array with shape (n, 1) where 1 is neutrino id (0 if not an MPV) - include_mpr: include MPR (cosmic-like) particles to primary target Outputs: - array: (N) list of group ids ''' # Loop over the list of particles primary_ids = np.empty(particles_v.size(), dtype=np.int32) for i, p in enumerate(particles_v): # If MPR particles are not included and the nu_id < 1, assign invalid if not include_mpr and nu_ids[i] < 1: primary_ids[i] = -1 continue # If the ancestor particle is unknown (no creation process), assign invalid (TODO: fix in supera) if not p.ancestor_creation_process(): primary_ids[i] = -1 continue # If the particle is not a shower or a track, it is not a primary if p.shape() != larcv.kShapeShower and p.shape() != larcv.kShapeTrack: primary_ids[i] = 0 continue # If the particle group originates from nuclear activity, it is not a primary gid = int(p.group_id()) process = particles_v[gid].creation_process() parent_pdg_code = abs(particles_v[gid].parent_pdg_code()) ancestor_pdg_code = abs(particles_v[gid].ancestor_pdg_code()) if 'Inelastic' in process or 'Capture' in process or parent_pdg_code == 2112 or ancestor_pdg_code == 2112: primary_ids[i] = 0 continue # If the parent is a pi0, make sure that it is a primary pi0 (pi0s are not stored in particle list) if parent_pdg_code == 111 and ancestor_pdg_code != 111: primary_ids[i] = 0 continue # If the parent ID of the primary particle in the group is the same as the group ID, it is a primary primary_ids[i] = int(particles_v[gid].parent_id() == gid) return primary_ids