Source code for mlreco.post_processing.analysis.muon_residual_range

import numpy as np
import pandas as pd
from mlreco.post_processing import post_processing
from mlreco.utils.gnn.cluster import get_cluster_label, get_cluster_batch
from mlreco.utils.ppn import uresnet_ppn_type_point_selector


[docs]def get_fragments(input_data, fragments, step=16, track_endpoints=None,
                spatial_size=768, fiducial=33, min_length=200, neighborhood=21):
    from sklearn.decomposition import PCA
    pca = PCA(n_components=3)

    local_fragments = []
    residual_range = []
    endpoints = []
    cluster_id = []
    for idx, f in enumerate(fragments):
        coords = input_data[f, 1:4]
        new_coords = pca.fit_transform(coords)
        new_coords[:, 1:] = 0.

        #
        # Require a minimum length
        #
        if new_coords[:, 0].max() - new_coords[:, 0].min() < min_length:
            continue

        #
        # This block makes sure we are starting from the endpoint.
        # We also recompute the residual range later to use dx estimations.
        #
        endpoint = None
        if track_endpoints is not None and len(track_endpoints[idx]) > 0:
            from scipy.spatial.distance import cdist
            distances = cdist(track_endpoints[idx][:, 1:4], coords)

            # Pick the one with the highest dQ in the neighborhood
            neighborhood_dQs = []
            for pt_idx, point in enumerate(track_endpoints[idx]):
                dQ = input_data[f, 4][distances[pt_idx, :] < neighborhood].sum()
                neighborhood_dQs.append(dQ)
            #print('neighborhood dQ nearby: ', neighborhood_dQs)
            endpoint_idx = np.argmax(neighborhood_dQs)
            pt_idx = np.argmin(distances[endpoint_idx, :])
            endpoint = new_coords[pt_idx, 0]
            real_endpoint = track_endpoints[idx][endpoint_idx, 1:4]
            print('picked', real_endpoint, (np.abs(real_endpoint) < fiducial).any() or (np.abs(spatial_size - real_endpoint) < fiducial).any())
        # If None could be found, it probably means
        # that the track is exiting the volume.
        # If it was found but it is too close to the
        # boundaries, we also skip it.
        if endpoint is None or (np.abs(real_endpoint) < fiducial).any() or (np.abs(spatial_size - real_endpoint) < fiducial).any():
            continue

        # Now we split the track into smaller fragments
        # of size `step` along the main PCA axis
        n = np.ceil((np.ceil(new_coords[:, 0].max()) - np.floor(new_coords[:, 0].min()))/step)
        bins = np.digitize(new_coords[:, 0], np.linspace(np.floor(new_coords[:, 0].min()), np.ceil(new_coords[:, 0].max()), int(n)))
        for b in np.unique(bins):
            local_fragments.append(f[bins == b])
            #middle_pt = (new_coords[bins == b, 0].min() + new_coords[bins == b, 0].max()) / 2.
            middle_pt = new_coords[bins == b, 0].min()
            residual_range.append(np.abs(middle_pt - endpoint))
            endpoints.append(real_endpoint)
            cluster_id.append(idx)

    if len(endpoints):
        endpoints = np.stack(endpoints, axis=0)

    return local_fragments, residual_range, endpoints, cluster_id


[docs]def compute_metrics(local_fragments, cluster_id, residual_range, input_data, endpoints, min_voxels=3):
    from sklearn.decomposition import PCA
    pca = PCA(n_components=3)

    metrics = {
        "dQdx": [],
        "rough_residual_range": [],
        "dN": [],
        "dQ": [],
        "dx": []
    }

    final_fragments, final_endpoints, final_cluster_id = [], [], []
    for idx, f in enumerate(local_fragments):
        if len(f) < min_voxels:
            continue
        coords = input_data[f, 1:4]
        new_coords = pca.fit_transform(coords)
        dx = new_coords[:, 0].max() - new_coords[:, 0].min()
        dQ = input_data[f, 4].sum()
        metrics["dQdx"].append(dQ/dx)
        metrics["rough_residual_range"].append(residual_range[idx])
        metrics["dN"].append(len(f))
        metrics["dQ"].append(dQ)
        metrics["dx"].append(dx)
        final_fragments.append(f)
        final_endpoints.append(endpoints[idx])
        final_cluster_id.append(cluster_id[idx])
    if len(final_endpoints):
        final_endpoints = np.stack(final_endpoints, axis=0)
    return pd.DataFrame(metrics), final_fragments, final_endpoints, np.array(final_cluster_id)


[docs]def compute_residual_range(input_data, final_cluster_ids, metrics, final_fragments, endpoints,
                          spatial_size=768, fiducial=33, recompute_dx=False):
    from scipy.spatial.distance import cdist
    from sklearn.decomposition import PCA
    pca = PCA(n_components=3)

    #true_cluster_ids = get_cluster_label(clust_label, final_fragments, column=6)
    fragments_batch_ids = get_cluster_batch(input_data, final_fragments)

    final_fragments = np.array([np.array(f, dtype=int) for f in final_fragments], dtype=object)
    residual_range = - np.ones((len(final_fragments),))
    new_dx = - np.ones((len(final_fragments),))
    # Loop over events in current batch
    for b in np.unique(fragments_batch_ids):
        batch_mask = fragments_batch_ids == b
        # Loop over track clusters in this event
        for idx in np.unique(final_cluster_ids[batch_mask]):
            track_fragments = final_fragments[batch_mask][final_cluster_ids[batch_mask] == idx]
            endpoint = endpoints[batch_mask][final_cluster_ids[batch_mask] == idx][0]

            # Now order fragments per distance to endpoint
            # We want to start from the fragment closest to the endpoint
            # and go from there.
            init_perm = np.argsort(metrics["rough_residual_range"][batch_mask][final_cluster_ids[batch_mask] == idx].values)
            order = []
            total_d = 0 # Keep track of total distance by accumulating dx
            # Loop over segments, starting with the one closest to the endpoint.
            for f_idx, f in enumerate(track_fragments[init_perm]):
                distances = cdist(input_data[np.array(f).astype(int), 1:4], [endpoint])
                order.append(total_d + (distances.max() + distances.min())/2)
                total_d += (distances.max() + distances.min())/2
                # At each step we change the "endpoint" to the other end of the segment.
                endpoint = input_data[np.array(f).astype(int), 1:4][np.argmax(distances.reshape((-1,)))]

            # Now we want to re-order according to the new distances we just computed.
            perm = np.argsort(np.array(order)[np.argsort(init_perm)])
            # print(b, idx, perm)
            current_distance = 0.
            for f_idx, f in enumerate(track_fragments[perm]):
                # Also recompute more accurate dx using a local PCA
                if recompute_dx:
                    new_coords = pca.fit_transform(input_data[f, 1:4])
                    dx = new_coords[:, 0].max() - new_coords[:, 0].min()
                else:
                    dx = metrics["dx"][batch_mask][final_cluster_ids[batch_mask] == idx].values[perm][f_idx]

                current_distance += dx

                where = np.where(batch_mask)[0][final_cluster_ids[batch_mask] == idx][perm][f_idx]
                residual_range[where] = current_distance
                if residual_range[where] > 1330:
                    print(b, idx, f_idx, residual_range[where], metrics["dx"][batch_mask][final_cluster_ids[batch_mask] == idx].sum())
                    #print(metrics["dx"][batch_mask][final_cluster_ids[batch_mask] == idx])
                new_dx[where] = dx


    metrics["residual_range"] = residual_range
    metrics["dx"] = new_dx
    metrics = metrics[metrics["residual_range"] > -1]
    return metrics


[docs]@post_processing(['muon-residual-range',
                'muon-residual-range-selected',
                'muon-residual-range-endpoints'],
                ['input_data', 'clust_data', 'points_label'],
                ['particles', 'particles_seg', 'node_pred_type'])
def muon_residual_range(cfg, module_cfg, data_blob, res, logdir, iteration,
                        input_data=None, ghost_mask=None, particles=None, particles_seg=None, node_pred_type=None,
                        points_label=None, clust_data=None, data_idx=None, clust_data_noghost=None,
                        **kwargs):
    """
    Compute residual range vs dQ/dx for track-like particles (muons, protons).
    """
    use_true_points = module_cfg.get('use_true_points', False)
    fiducial        = module_cfg.get('fiducial', 33)
    step            = module_cfg.get('step', 16) # about 5cm
    spatial_size    = module_cfg.get('spatial_size', 768)
    min_length      = module_cfg.get('min_length', 200)
    neighborhood    = module_cfg.get('neighborhood', 21)
    association_threshold = module_cfg.get('association_threshold', 5)
    recompute_dx    = module_cfg.get('recompute_dx', False)

    track_label = 1
    track_mask = particles_seg[data_idx] == track_label
    muon_mask = np.argmax(node_pred_type[data_idx], axis=1) == 2
    muon_particles = particles[data_idx][track_mask & muon_mask]
    if 'ghost' in res:
        input_data_deghosted = input_data[data_idx][ghost_mask[data_idx]]
    else:
        input_data_deghosted = input_data[data_idx]

    # Bin tracks into smaller fragments
    true_endpoints = points_label[data_idx][(points_label[data_idx][:, -1] == 1) & (points_label[data_idx][:, 4] == 1)]
    print('true endpoints', true_endpoints[:, 1:4])
    if use_true_points:
        track_endpoints = true_endpoints
    else:
        ppn = uresnet_ppn_type_point_selector(input_data[data_idx], res, entry=data_idx)
        #ppn_voxels = ppn[:, 1:4]
        ppn_endpoints = np.argmax(ppn[:, 13:])
        ppn_track_score = ppn[:, 8]
        #track_endpoints = ppn[(ppn_endpoints == 1) & (ppn_track_score > 0.5)]
        #track_endpoints = ppn[(ppn_track_score > 0.5)]
        track_endpoints = ppn

    # Select endpoints
    from scipy.spatial.distance import cdist
    selected_endpoints = []
    for f in muon_particles:
        distances = cdist(track_endpoints[:, 1:4], input_data_deghosted[f, 1:4])
        selection = distances.min(axis=1) < association_threshold
        selected_endpoints.append(track_endpoints[selection])

    local_fragments, residual_range, endpoints, cluster_id = \
        get_fragments(input_data_deghosted, muon_particles,
                    step=step, track_endpoints=selected_endpoints,
                    fiducial=fiducial, spatial_size=spatial_size,
                    min_length=min_length, neighborhood=neighborhood)
    # print('true points', points_label[data_idx][(points_label[data_idx][:, -1] == 1) & (points_label[data_idx][:, 4] == 1)][:, :4])
    # print('predicted endpoints', track_endpoints[:, :4])
    # print('endpoints', endpoints)
    # print('residual_range', residual_range)
    # for f in local_fragments:
    #     print(input_data[data_idx][f][:, 1:4].mean(axis=0))
    if len(local_fragments) == 0:
        return [((), ()), ((), ()), ((), ())]

    selected_endpoints = np.vstack(selected_endpoints)

    # Store selected endpoints information
    row_names_selected, row_values_selected = [], []
    true_d = cdist(selected_endpoints[:, 1:4], true_endpoints[:, 1:4])
    row_names_selected.extend([("distance_to_closest_true_endpoint",)] * len(selected_endpoints))
    row_values_selected.extend([(x,) for x in true_d.min(axis=1)])

    row_names_endpoints, row_values_endpoints = [], []
    selected_endpoints = np.unique(endpoints, axis=0)
    true_d = cdist(selected_endpoints, true_endpoints[:, 1:4])
    row_names_endpoints.extend([("distance_to_closest_true_endpoint",)] * len(selected_endpoints))
    row_values_endpoints.extend([(x,) for x in true_d.min(axis=1)])

    # Get true PID label to select true muons
    #true_cluster_id =  get_cluster_label(clust_label, local_fragments, column=6)
    #true_pdg = np.array([particles[idx].pdg_code() for idx in true_cluster_id])
    #muons = (true_pdg == 13) | (true_pdg == -13)

    # For each smaller fragment compute metrics
    result, final_fragments, final_endpoints, final_cluster_id = compute_metrics(np.array(local_fragments, dtype=object), cluster_id, np.array(residual_range), input_data_deghosted,  endpoints)

    if len(final_fragments) == 0:
        return [((), ()), ((), ()), ((), ())]

    # Refine residual range computation
    result = compute_residual_range(input_data_deghosted, final_cluster_id,
                                result, final_fragments,
                                final_endpoints, fiducial=fiducial,
                                spatial_size=spatial_size, recompute_dx=recompute_dx)

    row_names, row_values = [], []
    for idx in range(len(result['dQdx'])):
        tuple_names = ("residual_range", "rough_residual_range", "dQdx", "dN", "dx", "dQ")
        row_names.append(tuple_names)
        row_values.append(tuple([result[key][idx] for key in tuple_names]))

    return [(row_names, row_values), (row_names_selected, row_values_selected), (row_names_endpoints, row_values_endpoints)]
lartpc_mlreco3d documentation

Source code for mlreco.post_processing.analysis.muon_residual_range