Source code for mlreco.utils.deghosting

import numpy as np
import torch
from scipy.spatial.distance import cdist
from sklearn.cluster import DBSCAN
from torch_cluster import knn


[docs]def compute_rescaled_charge(input_data, deghost_mask, last_index = 6, batch_col = 0): """ Computes rescaled charge after deghosting Note ---- This function should work on Numpy arrays or Torch tensors. Parameters ---------- input_data: np.ndarray or torch.Tensor Shape (N, 4+num_features) where 4 corresponds to batch_id,x,y,z deghost_mask: np.ndarray or torch.Tensor Shape (N,), N_deghost is the predicted deghosted voxel count last_index: int, default 6 Indexes where hit-related features start @ 4 + deghost_input_features batch_col: int, default 0 Returns ------- np.ndarray or torch.Tensor Shape (N_deghost,) contains rescaled charge array for input data. Includes deghosted mask already. """ if torch.is_tensor(input_data): unique = torch.unique empty = lambda n: torch.empty(n, dtype=torch.long, device=hit_charges.device) sum = lambda x: torch.sum(x, dim=1) else: unique = np.unique empty = np.empty sum = lambda x: np.sum(x, axis=1) batches = unique(input_data[:, batch_col]) hit_charges = input_data[deghost_mask, last_index :last_index+3] hit_ids = input_data[deghost_mask, last_index+3:last_index+6] multiplicity = empty(hit_charges.shape, ) for b in batches: batch_mask = input_data[deghost_mask, batch_col] == b _, inverse, counts = unique(hit_ids[batch_mask], return_inverse=True, return_counts=True) multiplicity[batch_mask] = counts[inverse].reshape(-1,3) pmask = hit_ids > -1 charges = sum((hit_charges*pmask)/multiplicity)/sum(pmask) # Take average estimate return charges
[docs]def adapt_labels_knn(result, label_seg, label_clustering, num_classes=5, batch_column=0, coords_column_range=(1, 4), true_mask=None, use_numpy=False): """ Returns new cluster labels that have the same size as the input w/ ghost points. Points predicted as nonghost but that are true ghosts get the cluster label of the closest cluster. Points that are true ghosts and predicted as ghosts get "empty" (-1) values. Note ---- Uses GPU version from `torch_cluster.knn` to speed up the label adaptation computation. Parameters ---------- result: dict label_seg: list of torch.Tensor label_clustering: list of torch.Tensor num_classes: int, default 5 Semantic classes count. batch_column: int, default 0 coords_column_range: tuple, default (1, 4) true_mask: torch.Tensor, default None True nonghost mask. If None, will use the intersection of predicted nonghost and true nonghost. This option is useful to do "cheat ghost predictions" (i.e. mimic deghosting predictions using true ghost mask, to run later stages of the chain independently of the deghosting stage). Returns ------- np.ndarray shape: (input w/ ghost points, label_clusters_features) See Also -------- adapt_labels, adapt_labels_numpy """ complete_label_clustering = [] c1, c2 = coords_column_range if use_numpy: unique = np.unique ones = np.ones argmax = lambda x: np.argmax(x, axis=1) where = np.where concatenate0 = lambda x: np.concatenate(x, axis=1) concatenate1 = lambda x: np.concatenate(x, axis=0) compute_neighbor = lambda X_true, X_pred: cdist(X_pred[:, c1:c2], X_true[:, c1:c2]).argmin(axis=1) compute_distances = lambda X_true, X_pred: np.amax(np.abs(X_true[:, c1:c2] - X_pred[:, c1:c2]), axis=1) make_float = lambda x : x get_shape = lambda x, y: (x.shape[0], y.shape[1]) else: unique = lambda x: x.int().unique() ones = torch.ones argmax = lambda x: torch.argmax(x, dim=1) where = torch.where concatenate0 = lambda x: torch.cat(x, dim=1).float() concatenate1 = lambda x: torch.cat(x, dim=0) compute_neighbor = lambda X_true, X_pred: knn(X_true[:, c1:c2].float(), X_pred[:, c1:c2].float(), 1)[1] compute_distances = lambda X_true, X_pred: torch.amax(torch.abs(X_true[:, c1:c2] - X_pred[:, c1:c2]), dim=1) make_float = lambda x: x.float() get_shape = lambda x, y: (x.size(0), y.size(1)) if true_mask is not None: assert true_mask.shape[0] == label_seg[0].shape[0] c3 = max(c2, batch_column+1) for i in range(len(label_seg)): coords = label_seg[i][:, :c3] label_c = [] for batch_id in unique(coords[:, batch_column]): batch_mask = coords[:, batch_column] == batch_id batch_coords = coords[batch_mask] batch_clustering = label_clustering[i][label_clustering[i][:, batch_column] == batch_id] if len(batch_clustering) == 0: continue if true_mask is None: nonghost_mask = argmax(result['ghost'][i][batch_mask]) == 0 else: nonghost_mask = true_mask[batch_mask] # Prepare new labels new_label_clustering = -1. * ones(get_shape(batch_coords, batch_clustering)) if (not use_numpy) and torch.cuda.is_available(): new_label_clustering = new_label_clustering.cuda() new_label_clustering[:, :c3] = batch_coords # Loop over predicted semantics # print(result['segmentation'][i].shape, batch_mask.shape, batch_mask.sum()) if result['segmentation'][i].shape[0] == batch_mask.shape[0]: semantic_pred = argmax(result['segmentation'][i][batch_mask]) else: # adapt_labels was called from analysis tools (see below deghost_labels_and_predictions) # the problem in this case is that `segmentation` has already been deghosted semantic_pred = argmax(result['segmentation_noghost'][i][batch_mask]) # Include true nonghost voxels by default when they have the right semantic prediction true_pred = label_seg[i][batch_mask, -1] new_label_clustering[(true_pred < num_classes)] = make_float(batch_clustering) new_label_clustering[(true_pred < num_classes) & (semantic_pred != true_pred), c3:] = -1. for semantic in unique(semantic_pred): semantic_mask = semantic_pred == semantic if true_mask is not None: continue # Select voxels predicted as nonghost, but true ghosts (or true nonghost, but wrong semantic prediction) # mask = nonghost_mask & (label_seg[i][:, -1][batch_mask] == num_classes) & semantic_mask mask = nonghost_mask & (true_pred != semantic) & semantic_mask mask = where(mask)[0] #print(semantic, np.intersect1d(mask.cpu().numpy(), indices)) # Now we need a special treatment for these, if there are any. if batch_coords[mask].shape[0] == 0: continue tagged_voxels_count = 1 # to get the loop started X_true = batch_clustering[batch_clustering[:, -1] == semantic] if X_true.shape[0] == 0: continue X_pred = batch_coords[mask] while tagged_voxels_count > 0 and X_pred.shape[0] > 0: # print(batch_id, "while", X_true.shape, X_pred.shape, tagged_voxels_count) #neighbors = knn(X_true[:, c1:c2].float(), X_pred[:, c1:c2].float(), 1) #_, d = neighbors[0], neighbors[1] d = compute_neighbor(X_true, X_pred) # compute Chebyshev distance between predicted and true # distances = torch.amax(torch.abs(X_true[neighbors[1], c1:c2] - X_pred[neighbors[0], c1:c2]), dim=1) distances = compute_distances(X_true[d], X_pred) select_mask = distances <= 1 tagged_voxels_count = select_mask.sum() if tagged_voxels_count > 0: # We assign label of closest true nonghost voxel to those within Chebyshev distance 1 additional_label_clustering = concatenate0([X_pred[select_mask], X_true[d[select_mask], c3:]]) new_label_clustering[mask[select_mask]] = additional_label_clustering mask = mask[~select_mask] # Update for next iteration X_true = additional_label_clustering X_pred = X_pred[~select_mask] # Now we save - need only to keep predicted nonghost voxels. label_c.append(new_label_clustering[nonghost_mask]) #print(new_label_clustering[nonghost_mask][indices]) #print(new_label_clustering[indices]) label_c = concatenate1(label_c) #print("ignored 0 ", (label_c[:, -1] == -1).sum()) # Also run DBSCAN on track true clusters # We want to avoid true track clusters broken in two having the same cluster id label # Note: we assume we are adapting either cluster or kinematics labels, # for which cluster id and group id columns are 5 and 6 respectively. cluster_id_col = 5 track_label = 1 dbscan = DBSCAN(eps=1.1, min_samples=1, metric='chebyshev') track_mask = label_c[:, -1] == track_label for batch_id in unique(coords[:, batch_column]): batch_mask = label_c[:, batch_column] == batch_id # We want to select cluster ids for track-like particles batch_clustering = label_clustering[i][(label_clustering[i][:, batch_column] == batch_id) & (label_clustering[i][:, -1] == track_label)] if len(batch_clustering) == 0: continue # Reset counter for each batch entry # we need to avoid labeling a cluster with an already existing cluster id (maybe not track) cluster_count = unique(label_clustering[i][(label_clustering[i][:, batch_column] == batch_id)][:, cluster_id_col]).max()+1 for c in unique(batch_clustering[:, cluster_id_col]): if c < 0: continue cluster_mask = label_c[batch_mask, cluster_id_col] == c if cluster_mask.sum() == 0: continue coordinates = label_c[batch_mask][cluster_mask, c1:c2] if not isinstance(label_c, np.ndarray): coordinates = coordinates.detach().cpu().numpy() l = dbscan.fit(coordinates).labels_ # Unclustered voxels can keep the label -1 if not isinstance(label_c, np.ndarray): l = torch.tensor(l, device = label_c.device).float() l[l > -1] = l[l > -1] + cluster_count label_c[where(batch_mask)[0][cluster_mask], cluster_id_col] = l cluster_count = int(l.max() + 1) complete_label_clustering.append(label_c) return complete_label_clustering
[docs]def adapt_labels(*args, **kwargs): """ Kept for backward compatibility, to deprecate soon. See Also -------- adapt_labels_knn, adapt_labels_numpy """ return adapt_labels_knn(*args, **kargs)
[docs]def adapt_labels_numpy(*args, **kwargs): """ Numpy version of `adapt_labels`. See Also -------- adapt_labels, adapt_labels_knn """ return adapt_labels_knn(*args, **kwargs, use_numpy=True)
[docs]def deghost_labels_and_predictions(data_blob, result): ''' Given dictionaries <data_blob> and <result>, apply deghosting to uresnet predictions and labels for use in later reconstruction stages. Warning ------- Modifies in place the input data and result dictionaries. Note ---- Used in analysis tools (decorator). Parameters ---------- data_blob: dict result: dict ''' result['ghost_mask'] = [ result['ghost'][i].argmax(axis=1) == 0 \ for i in range(len(result['ghost']))] if 'segment_label' in data_blob: data_blob['true_ghost_mask'] = [ data_blob['segment_label'][i][:, -1] < 5 \ for i in range(len(data_blob['segment_label']))] data_blob['input_data_noghost'] = data_blob['input_data'] if 'segment_label' in data_blob: data_blob['input_data_trueghost'] = [data_blob['input_data'][i][mask] \ for i, mask in enumerate(data_blob['true_ghost_mask'])] data_blob['input_data'] = [data_blob['input_data'][i][mask] \ for i, mask in enumerate(result['ghost_mask'])] if 'cluster_label' in data_blob \ and data_blob['cluster_label'] is not None: # Save the clust_data before deghosting data_blob['cluster_label_noghost'] = data_blob['cluster_label'] data_blob['cluster_label'] = adapt_labels_numpy( result, data_blob['segment_label'], data_blob['cluster_label']) if 'seg_prediction' in result \ and result['seg_prediction'] is not None: result['seg_prediction'] = [ result['seg_prediction'][i][result['ghost_mask'][i]] \ for i in range(len(result['seg_prediction']))] if 'segmentation' in result \ and result['segmentation'] is not None: result['segmentation_noghost'] = result['segmentation'] result['segmentation'] = [ result['segmentation'][i][result['ghost_mask'][i]] \ for i in range(len(result['segmentation']))] if 'kinematics_label' in data_blob \ and data_blob['kinematics_label'] is not None: data_blob['kinematics_label'] = adapt_labels_numpy( result, data_blob['segment_label'], data_blob['kinematics_label']) # This needs to come last - in adapt_labels seg_label is the original one if 'segment_label' in data_blob \ and data_blob['segment_label'] is not None: data_blob['segment_label_noghost'] = data_blob['segment_label'] data_blob['segment_label'] = [ data_blob['segment_label'][i][result['ghost_mask'][i]] \ for i in range(len(data_blob['segment_label']))]