Source code for analysis.algorithms.selections.michel_electrons

from collections import OrderedDict
from turtle import update
from sklearn.decomposition import PCA

from analysis.classes.ui import FullChainEvaluator, FullChainPredictor
from analysis.decorator import evaluate
from analysis.algorithms.calorimetry import compute_track_length, compute_particle_direction

from pprint import pprint
import time
import numpy as np
from scipy.spatial.distance import cdist
from sklearn.cluster import DBSCAN
import pandas as pd


[docs]def yz_calibrations(yz_calib, points, edep, bin_size=10, voxel_size=0.3, spatial_size=6144): """ Apply YZ calibration factors from CSV file. Parameters ========== yz_calib: pd.DataFrame points: (N, 3) np.ndarray edep: (N,) np.ndarray bin_size: float, in cm voxel_size: float, in cm spatial_size: int, in voxels Returns ======= float calibrated sum of edep """ if yz_calib is None: return edep.sum() xbins = np.arange(0, spatial_size, bin_size/voxel_size) ybins = np.arange(0, spatial_size, bin_size/voxel_size) zbins = np.arange(0, spatial_size, bin_size/voxel_size) xidx = np.digitize(points[:, 0], xbins) yidx = np.digitize(points[:, 1], ybins) zidx = np.digitize(points[:, 2], zbins) df = pd.DataFrame({'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2], 'edep': edep}) df['xbin'] = xidx df['ybin'] = yidx df['zbin'] = zidx df['tpc'] = 'EE' df.loc[(df['xbin'] > 40) & (df['xbin'] <= 83), 'tpc'] = 'EW' df.loc[(df['xbin'] > 83) & (df['xbin'] <= 125), 'tpc'] = 'WE' df.loc[df['xbin'] > 125, 'tpc'] = 'WW' df = pd.merge(df, yz_calib, on=['tpc', 'ybin', 'zbin'], how='left') df.loc[np.isnan(df['scale']), 'scale'] = 1. return (df['edep'] * df['scale']).sum()
[docs]def get_bounding_box(points): return { "points_min_x": points[:, 0].min(), "points_max_x": points[:, 0].max(), "points_min_y": points[:, 1].min(), "points_max_y": points[:, 1].max(), "points_min_z": points[:, 2].min(), "points_max_z": points[:, 2].max() }
[docs]def is_attached_at_edge(points1, points2, attached_threshold=5, one_pixel=5, ablation_radius=15, ablation_min_samples=5, return_dbscan_cluster_count=False): distances = cdist(points1, points2) is_attached = np.min(distances) < attached_threshold # check for the edge now Michel_min, MIP_min = np.unravel_index(np.argmin(distances), distances.shape) min_coords = points2[MIP_min, :] ablated_cluster = points2[np.linalg.norm(points2-min_coords, axis=1) > ablation_radius] new_cluster_count, old_cluster_count = 0, 1 if ablated_cluster.shape[0] > 0: dbscan = DBSCAN(eps=one_pixel, min_samples=ablation_min_samples) old_cluster = dbscan.fit(points2).labels_ new_cluster = dbscan.fit(ablated_cluster).labels_ # If only one cluster is left, we were at the edge # Account for old cluster count in case track is fragmented # and put together by Track GNN old_cluster_count = len(np.unique(old_cluster[old_cluster>-1])) new_cluster_count = len(np.unique(new_cluster[new_cluster>-1])) is_edge = (old_cluster_count - new_cluster_count) <= 1 and old_cluster_count >= new_cluster_count else: # if nothing is left after ablating, this is a really small muon... calling it the edge is_edge = True if return_dbscan_cluster_count: return is_attached and is_edge, new_cluster_count, old_cluster_count return is_attached and is_edge
[docs]def find_cosmic_angle(muon, michel, endpoint, radius=30): """ Parameters ========== muon: Particle michel: Particle """ # Find muon end direction pca = PCA(n_components=2) neighborhood = (cdist(muon.points, [endpoint]) < radius).reshape((-1,)) if neighborhood.sum() < 3: return -1 * np.ones((3,)), -1. * np.ones((3,)) coords = pca.fit_transform(muon.points[neighborhood]) muon_dir = pca.components_[0, :] # Make sure muon direction is in the right sense far_point = muon.points[neighborhood][cdist(muon.points[neighborhood], [endpoint]).argmax()] vec = endpoint - far_point if np.dot(vec, muon_dir) < 0: muon_dir *= -1 # Find Michel start direction pca = PCA(n_components=2) neighborhood = (cdist(michel.points, [endpoint]) < radius).reshape((-1,)) if neighborhood.sum() < 3: return -1 * np.ones((3,)), -1. * np.ones((3,)) coords = pca.fit_transform(michel.points[neighborhood]) michel_dir = pca.components_[0, :] # Make sure Michel direction is in the right sense far_point = michel.points[neighborhood][cdist(michel.points[neighborhood], [endpoint]).argmax()] vec = far_point - endpoint if np.dot(vec, michel_dir) < 0: michel_dir *= -1 return muon_dir, michel_dir
[docs]def find_true_cosmic_angle(muon, michel, particles_asis_voxels, radius=30): #true_michel, true_muon = None, None #for p in particles_asis_voxels: # if p.id() == muon.id: # true_muon = p # if p.id() == michel.id: # true_michel = p if michel is None or muon is None: return -1 * np.ones((3,)), -1 * np.ones((3,)) #endpoint = [true_michel.x(), true_michel.y(), true_michel.z()] #endpoint = [true_muon.end_position().x(), true_muon.end_position().y(), true_muon.end_position().z()] endpoint_id = cdist(michel.points, muon.points).argmin() michel_id, muon_id = np.unravel_index(endpoint_id, (len(michel.points), len(muon.points))) endpoint = muon.points[muon_id] return find_cosmic_angle(muon, michel, endpoint, radius=radius)
[docs]@evaluate(['michels_pred', 'michels_true'], mode='per_batch') def michel_electrons(data_blob, res, data_idx, analysis_cfg, cfg): """ Selection of Michel electrons ============================= Configuration ============= Under `processor_cfg`, you can specify the following parameters: Output ====== """ try: yz_calib = pd.read_csv("/sdf/group/neutrino/ldomine/ICARUS_Calibrations/v09_62_00/tpc_yz_correction_data.csv") except Exception as e: print("Unable to load YZ calibration csv.") print(e) yz_calib = None # # ====== Configuration ====== # michels, true_michels = [], [] deghosting = analysis_cfg['analysis']['deghosting'] processor_cfg = analysis_cfg['analysis'].get('processor_cfg', {}) spatial_size = processor_cfg['spatial_size'] # Whether we are running on MC or data data = processor_cfg.get('data', False) # Avoid hardcoding labels michel_label = processor_cfg.get('michel_label', 2) track_label = processor_cfg.get('track_label', 1) muon_label = processor_cfg.get('muon_label', 2) pion_label = processor_cfg.get('pion_label', 3) shower_label = processor_cfg.get('shower_label', 0) # Thresholds attached_threshold = processor_cfg.get('attached_threshold', 5) one_pixel = processor_cfg.get('ablation_eps', 5) ablation_radius = processor_cfg.get('ablation_radius', 15) ablation_min_samples = processor_cfg.get('ablation_min_samples', 5) shower_threshold = processor_cfg.get('shower_threshold', 10) fiducial_threshold = processor_cfg.get('fiducial_threshold', 30) muon_min_voxel_count = processor_cfg.get('muon_min_voxel_count', 30) matching_mode = processor_cfg.get('matching_mode', 'true_to_pred') # # ====== Initialization ====== # # Initialize analysis differently depending on data/MC setting start = time.time() if not data: predictor = FullChainEvaluator(data_blob, res, cfg, processor_cfg, deghosting=deghosting) else: predictor = FullChainPredictor(data_blob, res, cfg, processor_cfg, deghosting=deghosting) print("Evaluator took %d s" % (time.time() - start)) image_idxs = data_blob['index'] start = time.time() for i, index in enumerate(image_idxs): index_dict = { 'Index': index, 'run': data_blob['run_info'][i][0], 'subrun': data_blob['run_info'][i][1], 'event': data_blob['run_info'][i][2] } pred_particles = predictor.get_particles(i, only_primaries=False) # Match with true particles if available if not data: true_particles = predictor.get_true_particles(i, only_primaries=False) # Match true particles to predicted particles true_ids = np.array([p.id for p in true_particles]) matched_particles = predictor.match_particles(i, mode=matching_mode, min_overlap=5, overlap_mode='counts') for tp in true_particles: if tp.semantic_type != michel_label: continue #if not predictor.is_contained(tp.points, threshold=fiducial_threshold): continue p = data_blob['particles_asis'][i][tp.id] michel_is_attached_at_edge = False distance_to_muon = np.infty muon_ablation_cluster_count = -1 muon_ablation_delta = None for tp2 in true_particles: if tp2.semantic_type != track_label: continue if tp2.pid != muon_label and tp2.pid != pion_label: continue if tp2.size < muon_min_voxel_count: continue d = cdist(tp.points, tp2.points).min() attached_at_edge, cluster_count, old_cluster_count = is_attached_at_edge(tp.points, tp2.points, attached_threshold=attached_threshold, one_pixel=one_pixel, ablation_radius=ablation_radius, ablation_min_samples=ablation_min_samples, return_dbscan_cluster_count=True) if d < distance_to_muon: distance_to_muon = d muon_ablation_cluster_count = cluster_count muon_ablation_delta = old_cluster_count - cluster_count if not attached_at_edge: continue michel_is_attached_at_edge = True break # Determine whether it was matched is_matched = False for mp in matched_particles: # matching is done true to pred true_idx = 0 if matching_mode == 'true_to_pred' else 1 if mp[true_idx] is None or mp[true_idx].id != tp.id or mp[true_idx].volume != tp.volume: continue is_matched = True break # DBSCAN evaluation dbscan = DBSCAN(eps=one_pixel, min_samples=ablation_min_samples) michel_clusters = dbscan.fit(tp.points).labels_ # Record true Michel true_michels.append(OrderedDict({ 'index': index, 'true_num_pix': tp.size, 'true_sum_pix': tp.depositions.sum(), 'is_attached_at_edge': michel_is_attached_at_edge, 'muon_ablation_cluster_count': muon_ablation_cluster_count, 'muon_ablation_delta': muon_ablation_delta, 'is_matched': is_matched, 'energy_init': p.energy_init(), 'energy_deposit': p.energy_deposit(), 'num_voxels': p.num_voxels(), 'distance_to_muon': distance_to_muon, 'michel_cluster_count': len(np.unique(michel_clusters[michel_clusters>-1])), 'volume': tp.volume })) true_michels[-1].update(get_bounding_box(tp.points)) true_michels[-1].update(index_dict) # Loop over predicted particles for p in pred_particles: if p.semantic_type != michel_label: continue print("Found a predicted Michel particle!") #if not predictor.is_contained(p.points, threshold=fiducial_threshold): continue # Check whether it is attached to the edge of a track michel_is_attached_at_edge = False muon = None for p2 in pred_particles: if p2.semantic_type != track_label: continue if p2.size < muon_min_voxel_count: continue if not is_attached_at_edge(p.points, p2.points, attached_threshold=attached_threshold, one_pixel=one_pixel, ablation_radius=ablation_radius, ablation_min_samples=ablation_min_samples): continue michel_is_attached_at_edge = True muon = p2 break # Require that the Michel is attached at the edge of a track if not michel_is_attached_at_edge: continue print('... is attached at edge of a muon') # Look for shower-like particles nearby daughter_showers = [] distance_to_closest_shower = np.infty if shower_threshold > -1: for p2 in pred_particles: if p2.id == p.id: continue if p2.semantic_type != shower_label and p2.semantic_type != michel_label: continue distance_to_closest_shower = min(distance_to_closest_shower, cdist(p.points, p2.points).min()) if distance_to_closest_shower >= shower_threshold: continue daughter_showers.append(p2) daughter_num_pix = np.sum([daugher.size for daugher in daughter_showers]) daughter_sum_pix = np.sum([daughter.depositions.sum() for daughter in daughter_showers]) # Record muon endpoint endpoint = np.array([-1, -1, -1]) if cdist(p.points, [muon.startpoint]).min() > cdist(p.points, [muon.endpoint]).min(): if (muon.endpoint != np.array([-1, -1, -1])).any(): endpoint = muon.endpoint else: if (muon.startpoint != np.array([-1, -1, -1])).any(): endpoint = muon.startpoint #if (endpoint == np.array([-1, -1, -1])).all(): endpoint_id = cdist(p.points, muon.points).argmin() michel_id, muon_id = np.unravel_index(endpoint_id, (len(p.points), len(muon.points))) endpoint = muon.points[muon_id] # Find angle between Michel and muon muon_dir, michel_dir = find_cosmic_angle(muon, p, endpoint) # Heuristic to isolate primary ionization dbscan = DBSCAN(eps=one_pixel, min_samples=ablation_min_samples) clabels = dbscan.fit(p.points).labels_ pionization = clabels[michel_id] # cluster label of Michel point closest to the muon primary_ionization = clabels == pionization # Record distance to muon michel_to_muon_distance = cdist(p.points, muon.points).min() # Record calibrated depositions sum pred_sum_pix_calib = yz_calibrations(yz_calib, p.points, p.depositions) pred_primary_sum_pix_calib = yz_calibrations(yz_calib, p.points[primary_ionization], p.depositions[primary_ionization]) print("caibrating ", p.depositions.sum(), pred_sum_pix_calib) # Record candidate Michel update_dict = { 'index': index, 'particle_id': p.id, 'muon_id': muon.id, 'volume': p.volume, 'muon_size': muon.size, 'daughter_num_pix': daughter_num_pix, 'daughter_sum_pix': daughter_sum_pix, 'distance_to_closest_shower': distance_to_closest_shower, 'pred_num_pix': p.size, 'pred_sum_pix': p.depositions.sum(), 'true_num_pix': -1, 'true_sum_pix': -1, 'true_primary_num_pix': -1, 'true_primary_sum_pix': -1, 'pred_num_pix_true': -1, 'pred_sum_pix_true': -1, 'michel_true_energy_init': -1, 'michel_true_energy_deposit': -1, 'michel_true_num_voxels': -1, 'cluster_purity': -1, 'cluster_efficiency': -1, 'matched': False, 'true_pdg': -1, 'true_id': -1, 'true_semantic_type': -1, 'true_noghost_num_pix': -1, 'true_noghost_sum_pix': -1, 'true_noghost_primary_num_pix': -1, 'true_noghost_primary_sum_pix': -1, 'endpoint_x': endpoint[0], 'endpoint_y': endpoint[1], 'endpoint_z': endpoint[2], 'true_particle_track_id': -1, 'true_particle_pdg': -1, 'true_particle_parent_pdg': -1, 'true_particle_energy_init': -1, 'true_particle_energy_deposit': -1, 'true_particle_px': -1, 'true_particle_py': -1, 'true_particle_pz': -1, 'true_particle_t': -1, 'true_particle_parent_t': -1, 'muon_dir_x': muon_dir[0], 'muon_dir_y': muon_dir[1], 'muon_dir_z': muon_dir[2], 'michel_dir_x': michel_dir[0], 'michel_dir_y': michel_dir[1], 'michel_dir_z': michel_dir[2], 'muon_true_dir_x': -1, 'muon_true_dir_y': -1, 'muon_true_dir_z': -1, 'michel_true_dir_x': -1, 'michel_true_dir_y': -1, 'michel_true_dir_z': -1, 'pred_primary_num_pix': primary_ionization.sum(), 'pred_primary_sum_pix': p.depositions[primary_ionization].sum(), 'true_primary_sum_pix_ADC': -1, 'true_sum_pix_MeV': -1, 'distance_to_muon': michel_to_muon_distance, 'muon_length': compute_track_length(muon.points), 'pred_sum_pix_calib': pred_sum_pix_calib, 'true_sum_pix_calib': -1, 'pred_primary_sum_pix_calib': pred_primary_sum_pix_calib, 'true_primary_sum_pix_calib': -1 } update_dict.update(index_dict) #print("Heuristic primary ", update_dict['pred_num_pix'], update_dict['pred_primary_num_pix']) if not data: for mp in matched_particles: # matching is done true2pred true_idx = 0 if matching_mode == 'true_to_pred' else 1 pred_idx = 1 - true_idx if mp[pred_idx] is None or mp[pred_idx].id != p.id or mp[pred_idx].volume != p.volume: continue if mp[true_idx] is None: continue if mp[true_idx].volume != p.volume: continue m = mp[true_idx] pe = m.purity_efficiency(p) overlap_indices, mindices, _ = np.intersect1d(m.voxel_indices, p.voxel_indices, return_indices=True) truep = data_blob['particles_asis'][i][m.id] truemuon = None if truep.parent_id() in true_ids: truemuon = true_particles[np.where(true_ids == truep.parent_id())[0][0]] cluster_label_noghost = predictor.get_true_label(i, "group", schema="cluster_label_noghost", volume=p.volume) segment_label_noghost = predictor.get_true_label(i, "segment", schema="cluster_label_noghost", volume=p.volume) charge_label_noghost = predictor.get_true_label(i, "charge", schema="cluster_label_noghost", volume=p.volume) truecluster = cluster_label_noghost == m.id trueprimary = (cluster_label_noghost == m.id) & (segment_label_noghost == michel_label) muon_true_dir, michel_true_dir = find_true_cosmic_angle(truemuon, m, data_blob['particles_asis_voxels'][i]) #input_data = predictor.get_true_label(i, "charge", schema="input_data_rescaled", volume=p.volume) input_data = res['input_rescaled'][i*2+p.volume][:, 4] cluster_label_pred_noghost = predictor.get_true_label(i, "group", schema="cluster_label", volume=p.volume) segment_label_pred_noghost = predictor.get_true_label(i, "segment", schema="cluster_label", volume=p.volume) charge_label_pred_noghost = predictor.get_true_label(i, "charge", schema="cluster_label", volume=p.volume) truecluster_pred = cluster_label_pred_noghost == m.id trueprimary_pred = truecluster_pred & (segment_label_pred_noghost == michel_label) # Calibrate too true_sum_pix_calib = yz_calibrations(yz_calib, m.points, m.depositions) true_primary_sum_pix_calib = yz_calibrations(yz_calib, res['input_rescaled'][i*2+p.volume][trueprimary_pred, 1:4], input_data[trueprimary_pred]) update_dict.update({ 'matched': True, 'true_id': m.id, 'true_pdg': m.pid, 'true_particle_track_id': m.asis.track_id(), 'true_particle_pdg': m.asis.pdg_code(), 'true_particle_parent_pdg': m.asis.parent_pdg_code(), 'true_particle_energy_init': m.asis.energy_init(), 'true_particle_energy_deposit': m.asis.energy_deposit(), 'true_particle_px': m.asis.px(), 'true_particle_py': m.asis.py(), 'true_particle_pz': m.asis.pz(), 'true_particle_t': m.asis.t(), 'true_particle_parent_t': m.asis.parent_t(), 'true_semantic_type': m.semantic_type, 'cluster_purity': pe['purity'], 'cluster_efficiency': pe['efficiency'], # true_* uses the entire Michel if michel_primary_ionization_only if False # but otherwise, only primary ionization # using predicted deghosting mask 'true_num_pix': m.size, 'true_sum_pix': m.depositions.sum(), # in ADC 'true_sum_pix_MeV': charge_label_pred_noghost[truecluster_pred].sum(), # in MeV 'true_primary_num_pix': trueprimary_pred.sum(), 'true_primary_sum_pix': charge_label_pred_noghost[trueprimary_pred].sum(), # will be in MeV 'true_primary_sum_pix_ADC': input_data[trueprimary_pred].sum(), # in ADC 'pred_num_pix_true': len(overlap_indices), 'pred_sum_pix_true': m.depositions[mindices].sum(), 'michel_true_energy_init': truep.energy_init(), 'michel_true_energy_deposit': truep.energy_deposit(), 'michel_true_num_voxels': truep.num_voxels(), # cluster_label includes radiative photon in Michel true particle # so using segmentation labels to find primary ionization as well # voxel sum will be in MeV here. # true_noghost_* is using the true deghosting mask. 'true_noghost_num_pix': np.count_nonzero(truecluster), 'true_noghost_sum_pix': charge_label_noghost[truecluster].sum(), # in MeV 'true_noghost_primary_num_pix': np.count_nonzero(trueprimary), 'true_noghost_primary_sum_pix': charge_label_noghost[trueprimary].sum(), # in MeV 'muon_true_dir_x': muon_true_dir[0], 'muon_true_dir_y': muon_true_dir[1], 'muon_true_dir_z': muon_true_dir[2], 'michel_true_dir_x': michel_true_dir[0], 'michel_true_dir_y': michel_true_dir[1], 'michel_true_dir_z': michel_true_dir[2], 'true_sum_pix_calib': true_sum_pix_calib, 'true_primary_sum_pix_calib': true_primary_sum_pix_calib }) break update_dict.update(get_bounding_box(p.points)) michels.append(OrderedDict(update_dict)) print("Loop took %d s" % (time.time() - start)) return [michels, true_michels]