Source code for analysis.algorithms.selections.stopping_muons

from collections import OrderedDict
from turtle import update
from sklearn.decomposition import PCA

from analysis.classes.ui import FullChainEvaluator, FullChainPredictor
from analysis.decorator import evaluate
from mlreco.utils.gnn.evaluation import clustering_metrics
from mlreco.utils.gnn.cluster import get_cluster_label

from pprint import pprint
import time
import numpy as np
from scipy.spatial.distance import cdist


[docs]@evaluate(['stopping_muons_cells', 'stopping_muons_pred', 'stopping_muons_true'], mode='per_batch')
def stopping_muons(data_blob, res, data_idx, analysis_cfg, cfg):
    """
    Selection of stopping muons
    ===========================

    To convert dQ/dx from ADC/cm to MeV/cm. We want a sample as pure
    as possible, hence the option to enforce the presence of a Michel
    electron at the end of the muon.

    Configuration
    =============

    """
    muon_cells, muons, true_muons = [], [], []

    deghosting          = analysis_cfg['analysis']['deghosting']
    processor_cfg       = analysis_cfg['analysis'].get('processor_cfg', {})

    spatial_size        = processor_cfg['spatial_size']
    #selection_threshold = processor_cfg['selection_threshold']
    bin_size            = processor_cfg['bin_size']
    # Whether we are running on MC or data
    data                = processor_cfg.get('data', False)
    # Whether to restrict to tracks that are close to Michel voxels
    # threshold =-1 to disable, otherwise it is the threshold below which we consider the track
    # might be attached to a Michel electron.
    Michel_threshold    = processor_cfg.get('Michel_threshold', -1)
    # Whether to enforce PID constraint (predicted as muon only)
    pid_constraint      = processor_cfg.get('pid_constraint', False)
    # Avoid hardcoding labels
    muon_label       = processor_cfg.get('muon_label', 2)
    track_label      = processor_cfg.get('track_label', 1)
    fiducial_threshold = processor_cfg.get('fiducial_threshold', 30)
    #muon_min_voxel_count = processor_cfg.get('muon_min_voxel_count', 30)

    # Initialize analysis differently depending on data/MC setting
    if not data:
        predictor = FullChainEvaluator(data_blob, res, cfg, processor_cfg, deghosting=deghosting)
    else:
        predictor = FullChainPredictor(data_blob, res, cfg, processor_cfg, deghosting=deghosting)

    image_idxs = data_blob['index']

    # TODO check that 0 is actually drift direction
    # TODO check that 2 is actually vertical direction
    x, y, z = 0, 1, 2

    pca = PCA(n_components=2)

    for i, index in enumerate(image_idxs):
        index_dict = {
            'Index': index,
            'run': data_blob['run_info'][i][0],
            'subrun': data_blob['run_info'][i][1],
            'event': data_blob['run_info'][i][2]
        }
        print(index_dict)

        pred_particles = predictor.get_particles(i, only_primaries=False)

        # Match with true particles if available
        if not data:
            true_particles = predictor.get_true_particles(i, only_primaries=False)
            # Match true particles to predicted particles
            true_ids = np.array([p.id for p in true_particles])
            matched_particles = predictor.match_particles(i, mode='true_to_pred')

            # Quality Metrics
            # FIXME: put into Analysis tools UI ?
            clusts = res['particles'][i]
            true_group_ids = get_cluster_label(data_blob['cluster_label'][i], clusts, column=6)
            pred_group_ids = res['inter_group_pred'][i]
            ari, ami, sbd, pur, eff = clustering_metrics(clusts, true_group_ids, pred_group_ids)

            # Count true stopping muons in the event
            for tp in true_particles:
                if tp.semantic_type != track_label: continue
                #if not predictor.is_contained(tp.points, threshold=fiducial_threshold): continue
                num_voxels = tp.size

                p = data_blob['particles_asis_voxels'][i][tp.id]
                if p.pdg_code() not in [13, -13]: continue
                endpoint = p.end_position()
                #is_stopping = endpoint.x() >= 0 and endpoint.x() < spatial_size \
                #            and endpoint.y() >= 0 and endpoint.y() < spatial_size \
                #            and endpoint.z() >= 0 and endpoint.z() < spatial_size
                #is_stopping = predictor.is_contained(np.array([[endpoint.x(), endpoint.y(), endpoint.z()]]), threshold=fiducial_threshold)
                #if not is_stopping: continue
                # Determine whether attached to Michel or not
                attached_to_Michel = False
                Michel_size = -1
                distance_to_Michel = -1
                for daughter_p in true_particles:
                    if daughter_p.semantic_type != 2: continue
                    daughter = data_blob['particles_asis'][i][daughter_p.id]
                    # #if daughter.id() == p.id() or daughter.parent_id() != p.id(): continue
                    # if daughter.pdg_code() not in [11, -11]: continue
                    # #if 'Decay' not in daughter.creation_process(): continue
                    # d = np.sqrt((daughter.position().x() - endpoint.x())**2 + (daughter.position().y() - endpoint.y())**2 + (daughter.position().z() - endpoint.z())**2)
                    # print('   BIP   ', daughter.creation_process(), daughter.track_id(), daughter.parent_track_id(), daughter.ancestor_track_id(), p.track_id(), p.ancestor_track_id(), daughter.parent_id(), p.id())
                    # print('found electron decay ', d, daughter.creation_process())
                    # print(daughter.position().x(), daughter.first_step().x())
                    # print(daughter.position().y(), daughter.first_step().y())
                    # print(daughter.position().z(), daughter.first_step().z())
                    # if d >= Michel_threshold: continue
                    if p.id() != daughter.parent_id(): continue
                    # print('found Michel in true particles !!!')
                    attached_to_Michel = True
                    Michel_size = daughter_p.size
                    distance_to_Michel = cdist(tp.points, daughter_p.points).min()
                    break
                # Determine whether it was matched
                is_matched = False
                for mp in matched_particles: # matching is done true to pred
                    if mp[0] is None or mp[0].id != tp.id: continue
                    is_matched = True
                    break

                true_muons.append(OrderedDict({
                    'index': index,
                    'attached_to_Michel': attached_to_Michel,
                    'distance_to_michel': distance_to_Michel,
                    'pdg': p.pdg_code(),
                    'num_voxels': num_voxels,
                    'Michel_num_voxels': Michel_size,
                    'is_matched': is_matched,
                    'overall_purity': pur,
                    'overall_efficiency': eff,
                    'overall_ari': ari,
                    'volume': tp.volume,
                    'endpoint_x': endpoint.x(),
                    'endpoint_y': endpoint.y(),
                    'endpoint_z': endpoint.z()
                }))
                true_muons[-1].update(index_dict)

        # Loop over predicted particles
        for p in pred_particles:
            if p.semantic_type != track_label: continue
            #if not predictor.is_contained(p.points, threshold=fiducial_threshold): continue
            coords = p.points

            # Check for presence of Michel electron
            attached_to_Michel = False
            closest_point = None
            for p2 in pred_particles:
                if p2.semantic_type != 2: continue
                d =  cdist(p.points, p2.points)
                if d.min() >= Michel_threshold: continue
                attached_to_Michel = True
                closest_point = d.min(axis=1).argmin()

            if not attached_to_Michel: continue

            # If asked to check predicted PID, exclude non-predicted-muons
            if pid_constraint and p.pid != muon_label: continue

            # PCA to get a rough direction
            coords_pca = pca.fit_transform(p.points)[:, 0]
            # Make sure where the end vs start is
            # if end == 0 we have the right bin ordering, otherwise might need to flip when we record the residual range
            distances_endpoints = [((coords[coords_pca.argmin(), :] - coords[closest_point, :])**2).sum(), ((coords[coords_pca.argmax(), :] - coords[closest_point, :])**2).sum()]
            end = np.argmin(distances_endpoints)

            # Record the stopping muon
            update_dict = {
                'index': index,
                'pred_particle_type': p.pid,
                'pred_particle_is_primary': p.is_primary,
                'pred_particle_size': p.size,
                #'projected_x_length': projected_x_length,
                'theta_yz': np.arctan2((coords[:, y].max() - coords[:, y].min()),(coords[:, z].max()-coords[:, z].min())),
                'theta_xz': np.arctan2((coords[:, x].max() - coords[:, x].min()),(coords[:, z].max()-coords[:, z].min())),
                'delta_x': coords[:, x].max() - coords[:, x].min(),
                'delta_y': coords[:, y].max() - coords[:, y].min(),
                'delta_z': coords[:, z].max() - coords[:, z].min(),
                'matched': False,
                'pca_length': coords_pca.max() - coords_pca.min(),
                #'t0': t0,
                'true_pdg': -1,
                'true_size': -1,
                'cluster_purity': -1,
                'cluster_efficiency': -1,
                'distance_endpoint_to_michel': np.sqrt(np.min(distances_endpoints)),
                'volume': p.volume,
                'endpoint_x': coords[closest_point, 0],
                'endpoint_y': coords[closest_point, 1],
                'endpoint_z': coords[closest_point, 2]
                }
            if not data:
                for mp in matched_particles: # matching is done true2pred
                    if mp[1] is None or mp[1].id != p.id: continue
                    if mp[0] is None: continue
                    m = mp[0]
                    pe = m.purity_efficiency(p)
                    update_dict.update({
                        'matched': True,
                        'true_pdg': m.pid,
                        'true_size': m.size,
                        'cluster_purity': pe['purity'],
                        'cluster_efficiency': pe['efficiency']
                    })

            update_dict.update(index_dict)
            muons.append(OrderedDict(update_dict))
            track_dict= update_dict

            # Split into segments and compute local dQ/dx
            if end != 0: # Invert points along PCA
                coords_pca = coords_pca.max() - coords_pca

            bins = np.arange(coords_pca.min(), coords_pca.max(), bin_size)
            # bin_inds takes values in [1, len(bins)]
            bin_inds = np.digitize(coords_pca, bins)

            # spatial_bins = np.arange(0, spatial_size, spatial_bin_size)
            # y_inds = np.digitize(coords[:, y], bins)
            # z_inds = np.digitize(coords[:, z], bins)
            # x_inds = np.digitize(coords[:, x], bins)
            for i in np.unique(bin_inds):
                mask = bin_inds == i
                if np.count_nonzero(mask) < 2: continue
                # Repeat PCA locally for better measurement of dx
                pca_axis = pca.fit_transform(p.points[mask])
                dx = pca_axis[:, 0].max() - pca_axis[:, 0].min()
                update_dict = OrderedDict({
                    'index': index,
                    'cell_dQ': p.depositions[mask].sum(),
                    'cell_dN':  np.count_nonzero(mask),
                    'cell_dx': dx,
                    'cell_bin': i,
                    'cell_residual_range': (i - 0.5) * bin_size,
                    'nbins': len(bins),
                    'volume': p.volume,
                    'cell_delta_x': p.points[mask, x].max() - p.points[mask, x].min(),
                    'cell_delta_y': p.points[mask, y].max() - p.points[mask, y].min(),
                    'cell_delta_z': p.points[mask, z].max() - p.points[mask, z].min(),
                    'cell_min_x': p.points[mask, x].min(),
                    'cell_min_y': p.points[mask, y].min(),
                    'cell_min_z': p.points[mask, z].min(),
                    'cell_max_x': p.points[mask, x].max(),
                    'cell_max_y': p.points[mask, y].max(),
                    'cell_max_z': p.points[mask, z].max(),
                })
                update_dict.update(track_dict)
                update_dict.update(index_dict)
                muon_cells.append(update_dict)

    return [muon_cells, muons, true_muons]
lartpc_mlreco3d documentation

Source code for analysis.algorithms.selections.stopping_muons