Source code for analysis.algorithms.selections.michel_electrons

from collections import OrderedDict
from turtle import update
from sklearn.decomposition import PCA

from analysis.classes.ui import FullChainEvaluator, FullChainPredictor
from analysis.decorator import evaluate
from analysis.algorithms.calorimetry import compute_track_length, compute_particle_direction

from pprint import pprint
import time
import numpy as np
from scipy.spatial.distance import cdist
from sklearn.cluster import DBSCAN
import pandas as pd


[docs]def yz_calibrations(yz_calib, points, edep, bin_size=10, voxel_size=0.3, spatial_size=6144):
    """
    Apply YZ calibration factors from CSV file.

    Parameters
    ==========
    yz_calib: pd.DataFrame
    points: (N, 3) np.ndarray
    edep: (N,) np.ndarray
    bin_size: float, in cm
    voxel_size: float, in cm
    spatial_size: int, in voxels

    Returns
    =======
    float
        calibrated sum of edep
    """
    if yz_calib is None:
        return edep.sum()

    xbins = np.arange(0, spatial_size, bin_size/voxel_size)
    ybins = np.arange(0, spatial_size, bin_size/voxel_size)
    zbins = np.arange(0, spatial_size, bin_size/voxel_size)
    xidx = np.digitize(points[:, 0], xbins)
    yidx = np.digitize(points[:, 1], ybins)
    zidx = np.digitize(points[:, 2], zbins)
    df = pd.DataFrame({'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2], 'edep': edep})
    df['xbin'] = xidx
    df['ybin'] = yidx
    df['zbin'] = zidx
    df['tpc'] = 'EE'
    df.loc[(df['xbin'] > 40) & (df['xbin'] <= 83), 'tpc'] = 'EW'
    df.loc[(df['xbin'] > 83) & (df['xbin'] <= 125), 'tpc'] = 'WE'
    df.loc[df['xbin'] > 125, 'tpc'] = 'WW'
    df = pd.merge(df, yz_calib, on=['tpc', 'ybin', 'zbin'], how='left')
    df.loc[np.isnan(df['scale']), 'scale'] = 1.
    return (df['edep'] * df['scale']).sum()

[docs]def get_bounding_box(points):
    return {
            "points_min_x": points[:, 0].min(),
            "points_max_x": points[:, 0].max(),
            "points_min_y": points[:, 1].min(),
            "points_max_y": points[:, 1].max(),
            "points_min_z": points[:, 2].min(),
            "points_max_z": points[:, 2].max()
    }

[docs]def is_attached_at_edge(points1, points2, attached_threshold=5,
                        one_pixel=5, ablation_radius=15, ablation_min_samples=5,
                        return_dbscan_cluster_count=False):
    distances = cdist(points1, points2)
    is_attached = np.min(distances) < attached_threshold
    # check for the edge now
    Michel_min, MIP_min = np.unravel_index(np.argmin(distances), distances.shape)
    min_coords = points2[MIP_min, :]
    ablated_cluster = points2[np.linalg.norm(points2-min_coords, axis=1) > ablation_radius]
    new_cluster_count, old_cluster_count = 0, 1
    if ablated_cluster.shape[0] > 0:
        dbscan = DBSCAN(eps=one_pixel, min_samples=ablation_min_samples)
        old_cluster = dbscan.fit(points2).labels_
        new_cluster = dbscan.fit(ablated_cluster).labels_
        # If only one cluster is left, we were at the edge
        # Account for old cluster count in case track is fragmented
        # and put together by Track GNN
        old_cluster_count = len(np.unique(old_cluster[old_cluster>-1]))
        new_cluster_count =  len(np.unique(new_cluster[new_cluster>-1]))
        is_edge = (old_cluster_count - new_cluster_count) <= 1 and old_cluster_count >= new_cluster_count
    else: # if nothing is left after ablating, this is a really small muon... calling it the edge
        is_edge = True

    if return_dbscan_cluster_count:
        return is_attached and is_edge, new_cluster_count, old_cluster_count

    return is_attached and is_edge

[docs]def find_cosmic_angle(muon, michel, endpoint, radius=30):
    """
    Parameters
    ==========
    muon: Particle
    michel: Particle
    """
    # Find muon end direction
    pca = PCA(n_components=2)
    neighborhood = (cdist(muon.points, [endpoint]) < radius).reshape((-1,))
    if neighborhood.sum() < 3:
        return -1 * np.ones((3,)), -1. * np.ones((3,))
    coords = pca.fit_transform(muon.points[neighborhood])
    muon_dir = pca.components_[0, :]
    # Make sure muon direction is in the right sense
    far_point = muon.points[neighborhood][cdist(muon.points[neighborhood], [endpoint]).argmax()]
    vec = endpoint - far_point
    if np.dot(vec, muon_dir) < 0:
        muon_dir *= -1
    # Find Michel start direction
    pca = PCA(n_components=2)
    neighborhood = (cdist(michel.points, [endpoint]) < radius).reshape((-1,))
    if neighborhood.sum() < 3:
        return -1 * np.ones((3,)), -1. * np.ones((3,))
    coords = pca.fit_transform(michel.points[neighborhood])
    michel_dir = pca.components_[0, :]
    # Make sure Michel direction is in the right sense
    far_point = michel.points[neighborhood][cdist(michel.points[neighborhood], [endpoint]).argmax()]
    vec = far_point - endpoint
    if np.dot(vec, michel_dir) < 0:
        michel_dir *= -1

    return muon_dir, michel_dir

[docs]def find_true_cosmic_angle(muon, michel, particles_asis_voxels, radius=30):
    #true_michel, true_muon = None, None
    #for p in particles_asis_voxels:
    #    if p.id() == muon.id:
    #        true_muon = p
    #    if p.id() == michel.id:
    #        true_michel = p
    if michel is None or muon is None:
        return -1 * np.ones((3,)), -1 * np.ones((3,))
    #endpoint = [true_michel.x(), true_michel.y(), true_michel.z()]
    #endpoint = [true_muon.end_position().x(), true_muon.end_position().y(), true_muon.end_position().z()]
    endpoint_id = cdist(michel.points, muon.points).argmin()
    michel_id, muon_id = np.unravel_index(endpoint_id, (len(michel.points), len(muon.points)))
    endpoint = muon.points[muon_id]
    return find_cosmic_angle(muon, michel, endpoint, radius=radius)

[docs]@evaluate(['michels_pred', 'michels_true'], mode='per_batch')
def michel_electrons(data_blob, res, data_idx, analysis_cfg, cfg):
    """
    Selection of Michel electrons
    =============================


    Configuration
    =============
    Under `processor_cfg`, you can specify the following parameters:


    Output
    ======
    """
    try:
        yz_calib = pd.read_csv("/sdf/group/neutrino/ldomine/ICARUS_Calibrations/v09_62_00/tpc_yz_correction_data.csv")
    except Exception as e:
        print("Unable to load YZ calibration csv.")
        print(e)
        yz_calib = None
    #
    # ====== Configuration ======
    #
    michels, true_michels = [], []
    deghosting = analysis_cfg['analysis']['deghosting']

    processor_cfg       = analysis_cfg['analysis'].get('processor_cfg', {})
    spatial_size        = processor_cfg['spatial_size']
    # Whether we are running on MC or data
    data           = processor_cfg.get('data', False)

    # Avoid hardcoding labels
    michel_label       = processor_cfg.get('michel_label', 2)
    track_label        = processor_cfg.get('track_label', 1)
    muon_label         = processor_cfg.get('muon_label', 2)
    pion_label         = processor_cfg.get('pion_label', 3)
    shower_label       = processor_cfg.get('shower_label', 0)

    # Thresholds
    attached_threshold = processor_cfg.get('attached_threshold', 5)
    one_pixel          = processor_cfg.get('ablation_eps', 5)
    ablation_radius    = processor_cfg.get('ablation_radius', 15)
    ablation_min_samples = processor_cfg.get('ablation_min_samples', 5)
    shower_threshold   = processor_cfg.get('shower_threshold', 10)
    fiducial_threshold = processor_cfg.get('fiducial_threshold', 30)
    muon_min_voxel_count = processor_cfg.get('muon_min_voxel_count', 30)
    matching_mode      = processor_cfg.get('matching_mode', 'true_to_pred')
    #
    # ====== Initialization ======
    #
    # Initialize analysis differently depending on data/MC setting
    start = time.time()
    if not data:
        predictor = FullChainEvaluator(data_blob, res, cfg, processor_cfg, deghosting=deghosting)
    else:
        predictor = FullChainPredictor(data_blob, res, cfg, processor_cfg, deghosting=deghosting)
    print("Evaluator took %d s" % (time.time() - start))
    image_idxs = data_blob['index']


    start = time.time()
    for i, index in enumerate(image_idxs):
        index_dict = {
            'Index': index,
            'run': data_blob['run_info'][i][0],
            'subrun': data_blob['run_info'][i][1],
            'event': data_blob['run_info'][i][2]
        }
        pred_particles = predictor.get_particles(i, only_primaries=False)

        # Match with true particles if available
        if not data:
            true_particles = predictor.get_true_particles(i, only_primaries=False)
            # Match true particles to predicted particles
            true_ids = np.array([p.id for p in true_particles])
            matched_particles = predictor.match_particles(i, mode=matching_mode, min_overlap=5, overlap_mode='counts')

            for tp in true_particles:
                if tp.semantic_type != michel_label: continue
                #if not predictor.is_contained(tp.points, threshold=fiducial_threshold): continue
                p = data_blob['particles_asis'][i][tp.id]

                michel_is_attached_at_edge = False
                distance_to_muon = np.infty
                muon_ablation_cluster_count = -1
                muon_ablation_delta = None
                for tp2 in true_particles:
                    if tp2.semantic_type != track_label: continue
                    if tp2.pid != muon_label and tp2.pid != pion_label: continue
                    if tp2.size < muon_min_voxel_count: continue
                    d = cdist(tp.points, tp2.points).min()
                    attached_at_edge, cluster_count, old_cluster_count = is_attached_at_edge(tp.points, tp2.points,
                                                                    attached_threshold=attached_threshold,
                                                                    one_pixel=one_pixel,
                                                                    ablation_radius=ablation_radius,
                                                                    ablation_min_samples=ablation_min_samples,
                                                                    return_dbscan_cluster_count=True)
                    if d < distance_to_muon:
                        distance_to_muon = d
                        muon_ablation_cluster_count = cluster_count
                        muon_ablation_delta = old_cluster_count - cluster_count
                    if not attached_at_edge: continue
                    michel_is_attached_at_edge = True
                    break

                # Determine whether it was matched
                is_matched = False
                for mp in matched_particles: # matching is done true to pred
                    true_idx = 0 if matching_mode == 'true_to_pred' else 1
                    if mp[true_idx] is None or mp[true_idx].id != tp.id or mp[true_idx].volume != tp.volume: continue
                    is_matched = True
                    break

                # DBSCAN evaluation
                dbscan = DBSCAN(eps=one_pixel, min_samples=ablation_min_samples)
                michel_clusters = dbscan.fit(tp.points).labels_
                # Record true Michel
                true_michels.append(OrderedDict({
                    'index': index,
                    'true_num_pix': tp.size,
                    'true_sum_pix': tp.depositions.sum(),
                    'is_attached_at_edge': michel_is_attached_at_edge,
                    'muon_ablation_cluster_count': muon_ablation_cluster_count,
                    'muon_ablation_delta': muon_ablation_delta,
                    'is_matched': is_matched,
                    'energy_init': p.energy_init(),
                    'energy_deposit': p.energy_deposit(),
                    'num_voxels': p.num_voxels(),
                    'distance_to_muon': distance_to_muon,
                    'michel_cluster_count': len(np.unique(michel_clusters[michel_clusters>-1])),
                    'volume': tp.volume
                }))
                true_michels[-1].update(get_bounding_box(tp.points))
                true_michels[-1].update(index_dict)

        # Loop over predicted particles
        for p in pred_particles:
            if p.semantic_type != michel_label: continue
            print("Found a predicted Michel particle!")
            #if not predictor.is_contained(p.points, threshold=fiducial_threshold): continue
            # Check whether it is attached to the edge of a track
            michel_is_attached_at_edge = False
            muon = None
            for p2 in pred_particles:
                if p2.semantic_type != track_label: continue
                if p2.size < muon_min_voxel_count: continue
                if not is_attached_at_edge(p.points, p2.points,
                                        attached_threshold=attached_threshold,
                                        one_pixel=one_pixel,
                                        ablation_radius=ablation_radius,
                                        ablation_min_samples=ablation_min_samples): continue
                michel_is_attached_at_edge = True
                muon = p2
                break

            # Require that the Michel is attached at the edge of a track
            if not michel_is_attached_at_edge: continue
            print('... is attached at edge of a muon')

            # Look for shower-like particles nearby
            daughter_showers = []
            distance_to_closest_shower = np.infty
            if shower_threshold > -1:
                for p2 in pred_particles:
                    if p2.id == p.id: continue
                    if p2.semantic_type != shower_label and p2.semantic_type != michel_label: continue
                    distance_to_closest_shower = min(distance_to_closest_shower, cdist(p.points, p2.points).min())
                    if distance_to_closest_shower >= shower_threshold: continue
                    daughter_showers.append(p2)
            daughter_num_pix = np.sum([daugher.size for daugher in daughter_showers])
            daughter_sum_pix = np.sum([daughter.depositions.sum() for daughter in daughter_showers])

            # Record muon endpoint
            endpoint = np.array([-1, -1, -1])
            if cdist(p.points, [muon.startpoint]).min() > cdist(p.points, [muon.endpoint]).min():
                if (muon.endpoint != np.array([-1, -1, -1])).any():
                    endpoint = muon.endpoint
            else:
                if (muon.startpoint != np.array([-1, -1, -1])).any():
                    endpoint = muon.startpoint
            #if (endpoint == np.array([-1, -1, -1])).all():
            endpoint_id = cdist(p.points, muon.points).argmin()
            michel_id, muon_id = np.unravel_index(endpoint_id, (len(p.points), len(muon.points)))
            endpoint = muon.points[muon_id]

            # Find angle between Michel and muon
            muon_dir, michel_dir = find_cosmic_angle(muon, p, endpoint)

            # Heuristic to isolate primary ionization
            dbscan = DBSCAN(eps=one_pixel, min_samples=ablation_min_samples)
            clabels = dbscan.fit(p.points).labels_
            pionization = clabels[michel_id] # cluster label of Michel point closest to the muon
            primary_ionization = clabels == pionization

            # Record distance to muon
            michel_to_muon_distance = cdist(p.points, muon.points).min()

            # Record calibrated depositions sum
            pred_sum_pix_calib = yz_calibrations(yz_calib, p.points, p.depositions)
            pred_primary_sum_pix_calib = yz_calibrations(yz_calib, p.points[primary_ionization], p.depositions[primary_ionization])
            print("caibrating ", p.depositions.sum(), pred_sum_pix_calib)

            # Record candidate Michel
            update_dict = {
                'index': index,
                'particle_id': p.id,
                'muon_id': muon.id,
                'volume': p.volume,
                'muon_size': muon.size,
                'daughter_num_pix': daughter_num_pix,
                'daughter_sum_pix': daughter_sum_pix,
                'distance_to_closest_shower': distance_to_closest_shower,
                'pred_num_pix': p.size,
                'pred_sum_pix': p.depositions.sum(),
                'true_num_pix': -1,
                'true_sum_pix': -1,
                'true_primary_num_pix': -1,
                'true_primary_sum_pix': -1,
                'pred_num_pix_true': -1,
                'pred_sum_pix_true': -1,
                'michel_true_energy_init': -1,
                'michel_true_energy_deposit': -1,
                'michel_true_num_voxels': -1,
                'cluster_purity': -1,
                'cluster_efficiency': -1,
                'matched': False,
                'true_pdg': -1,
                'true_id': -1,
                'true_semantic_type': -1,
                'true_noghost_num_pix': -1,
                'true_noghost_sum_pix': -1,
                'true_noghost_primary_num_pix': -1,
                'true_noghost_primary_sum_pix': -1,
                'endpoint_x': endpoint[0],
                'endpoint_y': endpoint[1],
                'endpoint_z': endpoint[2],
                'true_particle_track_id': -1,
                'true_particle_pdg': -1,
                'true_particle_parent_pdg': -1,
                'true_particle_energy_init': -1,
                'true_particle_energy_deposit': -1,
                'true_particle_px': -1,
                'true_particle_py': -1,
                'true_particle_pz': -1,
                'true_particle_t': -1,
                'true_particle_parent_t': -1,
                'muon_dir_x': muon_dir[0],
                'muon_dir_y': muon_dir[1],
                'muon_dir_z': muon_dir[2],
                'michel_dir_x': michel_dir[0],
                'michel_dir_y': michel_dir[1],
                'michel_dir_z': michel_dir[2],
                'muon_true_dir_x': -1,
                'muon_true_dir_y': -1,
                'muon_true_dir_z': -1,
                'michel_true_dir_x': -1,
                'michel_true_dir_y': -1,
                'michel_true_dir_z': -1,
                'pred_primary_num_pix': primary_ionization.sum(),
                'pred_primary_sum_pix': p.depositions[primary_ionization].sum(),
                'true_primary_sum_pix_ADC': -1,
                'true_sum_pix_MeV': -1,
                'distance_to_muon': michel_to_muon_distance,
                'muon_length': compute_track_length(muon.points),
                'pred_sum_pix_calib': pred_sum_pix_calib,
                'true_sum_pix_calib': -1,
                'pred_primary_sum_pix_calib': pred_primary_sum_pix_calib,
                'true_primary_sum_pix_calib': -1
            }
            update_dict.update(index_dict)
            #print("Heuristic primary ", update_dict['pred_num_pix'], update_dict['pred_primary_num_pix'])

            if not data:
                for mp in matched_particles: # matching is done true2pred
                    true_idx = 0 if matching_mode == 'true_to_pred' else 1
                    pred_idx = 1 - true_idx
                    if mp[pred_idx] is None or mp[pred_idx].id != p.id or mp[pred_idx].volume != p.volume: continue
                    if mp[true_idx] is None: continue
                    if mp[true_idx].volume != p.volume: continue
                    m = mp[true_idx]
                    pe = m.purity_efficiency(p)
                    overlap_indices, mindices, _ = np.intersect1d(m.voxel_indices, p.voxel_indices, return_indices=True)
                    truep = data_blob['particles_asis'][i][m.id]
                    truemuon = None
                    if truep.parent_id() in true_ids:
                        truemuon = true_particles[np.where(true_ids == truep.parent_id())[0][0]]

                    cluster_label_noghost = predictor.get_true_label(i, "group", schema="cluster_label_noghost", volume=p.volume)
                    segment_label_noghost = predictor.get_true_label(i, "segment", schema="cluster_label_noghost", volume=p.volume)
                    charge_label_noghost = predictor.get_true_label(i, "charge", schema="cluster_label_noghost", volume=p.volume)

                    truecluster = cluster_label_noghost == m.id
                    trueprimary = (cluster_label_noghost == m.id) & (segment_label_noghost == michel_label)
                    muon_true_dir, michel_true_dir = find_true_cosmic_angle(truemuon, m, data_blob['particles_asis_voxels'][i])

                    #input_data = predictor.get_true_label(i, "charge", schema="input_data_rescaled", volume=p.volume)
                    input_data = res['input_rescaled'][i*2+p.volume][:, 4]
                    cluster_label_pred_noghost = predictor.get_true_label(i, "group", schema="cluster_label", volume=p.volume)
                    segment_label_pred_noghost = predictor.get_true_label(i, "segment", schema="cluster_label", volume=p.volume)
                    charge_label_pred_noghost = predictor.get_true_label(i, "charge", schema="cluster_label", volume=p.volume)
                    truecluster_pred = cluster_label_pred_noghost == m.id
                    trueprimary_pred = truecluster_pred & (segment_label_pred_noghost == michel_label)

                    # Calibrate too
                    true_sum_pix_calib = yz_calibrations(yz_calib, m.points, m.depositions)
                    true_primary_sum_pix_calib = yz_calibrations(yz_calib, res['input_rescaled'][i*2+p.volume][trueprimary_pred, 1:4], input_data[trueprimary_pred])
                    update_dict.update({
                        'matched': True,
                        'true_id': m.id,
                        'true_pdg': m.pid,
                        'true_particle_track_id': m.asis.track_id(),
                        'true_particle_pdg': m.asis.pdg_code(),
                        'true_particle_parent_pdg': m.asis.parent_pdg_code(),
                        'true_particle_energy_init': m.asis.energy_init(),
                        'true_particle_energy_deposit': m.asis.energy_deposit(),
                        'true_particle_px': m.asis.px(),
                        'true_particle_py': m.asis.py(),
                        'true_particle_pz': m.asis.pz(),
                        'true_particle_t': m.asis.t(),
                        'true_particle_parent_t': m.asis.parent_t(),
                        'true_semantic_type': m.semantic_type,
                        'cluster_purity': pe['purity'],
                        'cluster_efficiency': pe['efficiency'],
                        # true_* uses the entire Michel if michel_primary_ionization_only if False
                        # but otherwise, only primary ionization
                        # using predicted deghosting mask
                        'true_num_pix': m.size,
                        'true_sum_pix': m.depositions.sum(), # in ADC
                        'true_sum_pix_MeV': charge_label_pred_noghost[truecluster_pred].sum(), # in MeV
                        'true_primary_num_pix': trueprimary_pred.sum(),
                        'true_primary_sum_pix': charge_label_pred_noghost[trueprimary_pred].sum(), # will be in MeV
                        'true_primary_sum_pix_ADC': input_data[trueprimary_pred].sum(), # in ADC
                        'pred_num_pix_true': len(overlap_indices),
                        'pred_sum_pix_true': m.depositions[mindices].sum(),
                        'michel_true_energy_init': truep.energy_init(),
                        'michel_true_energy_deposit': truep.energy_deposit(),
                        'michel_true_num_voxels': truep.num_voxels(),
                        # cluster_label includes radiative photon in Michel true particle
                        # so using segmentation labels to find primary ionization as well
                        # voxel sum will be in MeV here.
                        # true_noghost_* is using the true deghosting mask.
                        'true_noghost_num_pix': np.count_nonzero(truecluster),
                        'true_noghost_sum_pix': charge_label_noghost[truecluster].sum(), # in MeV
                        'true_noghost_primary_num_pix': np.count_nonzero(trueprimary),
                        'true_noghost_primary_sum_pix': charge_label_noghost[trueprimary].sum(), # in MeV
                        'muon_true_dir_x': muon_true_dir[0],
                        'muon_true_dir_y': muon_true_dir[1],
                        'muon_true_dir_z': muon_true_dir[2],
                        'michel_true_dir_x': michel_true_dir[0],
                        'michel_true_dir_y': michel_true_dir[1],
                        'michel_true_dir_z': michel_true_dir[2],
                        'true_sum_pix_calib': true_sum_pix_calib,
                        'true_primary_sum_pix_calib': true_primary_sum_pix_calib
                    })
                    break

            update_dict.update(get_bounding_box(p.points))
            michels.append(OrderedDict(update_dict))
    print("Loop took %d s" % (time.time() - start))

    return [michels, true_michels]
lartpc_mlreco3d documentation

Source code for analysis.algorithms.selections.michel_electrons