Source code for mlreco.models.layers.common.mobilenet

import numpy as np
import torch
import torch.nn as nn

import MinkowskiEngine as ME
import MinkowskiFunctional as MF

from mlreco.models.layers.common.blocks import MBResConv, MBConv, SEBlock, MBResConvSE
from mlreco.models.layers.common.activation_normalization_factories import activations_dict, activations_construct
from mlreco.models.layers.common.configuration import setup_cnn_configuration

[docs]class MobileNetV3(torch.nn.Module): ''' Vanilla UResNet with access to intermediate feature planes. Configurations -------------- depth : int Depth of UResNet, also corresponds to how many times we down/upsample. num_filters : int Number of filters in the first convolution of UResNet. Will increase linearly with depth. reps : int, optional Convolution block repetition factor kernel_size : int, optional Kernel size for the SC (sparse convolutions for down/upsample). input_kernel : int, optional Receptive field size for very first convolution after input layer. '''
[docs] def __init__(self, cfg, name='mobilenetv3'): super(MobileNetV3, self).__init__() setup_cnn_configuration(self, cfg, name) model_cfg = cfg[name] # UResNet Configurations self.reps = model_cfg.get('reps', 2) self.depth = model_cfg.get('depth', 5) self.num_filters = model_cfg.get('num_filters', 16) self.nPlanes = [i * self.num_filters for i in range(1, self.depth+1)] # self.kernel_size = cfg.get('kernel_size', 3) # self.downsample = cfg.get(downsample, 2) self.input_kernel = model_cfg.get('input_kernel', 3) # Initialize Input Layer self.input_layer = ME.MinkowskiConvolution( in_channels=self.num_input, out_channels=self.num_filters, kernel_size=self.input_kernel, stride=1, dimension=self.D) # Initialize Encoder self.encoding_conv = [] self.encoding_block = [] for i, F in enumerate(self.nPlanes): m = [] for _ in range(self.reps): m.append(MBResConv(F, F, dimension=self.D, activation=self.activation_name, activation_args=self.activation_args)) m = nn.Sequential(*m) self.encoding_block.append(m) m = [] if i < self.depth-1: m.append(ME.MinkowskiBatchNorm(F)) m.append(activations_construct( self.activation_name, **self.activation_args)) m.append(ME.MinkowskiConvolution( in_channels=self.nPlanes[i], out_channels=self.nPlanes[i+1], kernel_size=2, stride=2, dimension=self.D)) m = nn.Sequential(*m) self.encoding_conv.append(m) self.encoding_conv = nn.Sequential(*self.encoding_conv) self.encoding_block = nn.Sequential(*self.encoding_block) # Initialize Decoder self.decoding_block = [] self.decoding_conv = [] for i in range(self.depth-2, -1, -1): m = [] m.append(ME.MinkowskiBatchNorm(self.nPlanes[i+1])) m.append(activations_construct( self.activation_name, **self.activation_args)) m.append(ME.MinkowskiConvolutionTranspose( in_channels=self.nPlanes[i+1], out_channels=self.nPlanes[i], kernel_size=2, stride=2, dimension=self.D)) m = nn.Sequential(*m) self.decoding_conv.append(m) m = [] for j in range(self.reps): m.append(MBResConv(self.nPlanes[i] * (2 if j == 0 else 1), self.nPlanes[i], dimension=self.D, activation=self.activation_name, activation_args=self.activation_args)) m = nn.Sequential(*m) self.decoding_block.append(m) self.decoding_block = nn.Sequential(*self.decoding_block) self.decoding_conv = nn.Sequential(*self.decoding_conv)
[docs] def encoder(self, x): ''' Vanilla UResNet Encoder. INPUTS: - x (SparseTensor): MinkowskiEngine SparseTensor RETURNS: - result (dict): dictionary of encoder output with intermediate feature planes: 1) encoderTensors (list): list of intermediate SparseTensors 2) finalTensor (SparseTensor): feature tensor at deepest layer. ''' x = self.input_layer(x) encoderTensors = [x] for i, layer in enumerate(self.encoding_block): x = self.encoding_block[i](x) encoderTensors.append(x) x = self.encoding_conv[i](x) result = { "encoderTensors": encoderTensors, "finalTensor": x } return result
[docs] def decoder(self, final, encoderTensors): ''' Vanilla UResNet Decoder INPUTS: - encoderTensors (list of SparseTensor): output of encoder. RETURNS: - decoderTensors (list of SparseTensor): list of feature tensors in decoding path at each spatial resolution. ''' decoderTensors = [] x = final for i, layer in enumerate(self.decoding_conv): eTensor = encoderTensors[-i-2] x = layer(x) # print(x, eTensor) x = ME.cat(eTensor, x) x = self.decoding_block[i](x) decoderTensors.append(x) return decoderTensors
[docs] def forward(self, input): coordinates = input[:, 0:self.D+1].int() features = input[:, self.D+1:].float() x = ME.SparseTensor(features, coordinates=coordinates) encoderOutput = self.encoder(x) encoderTensors = encoderOutput['encoderTensors'] finalTensor = encoderOutput['finalTensor'] decoderTensors = self.decoder(finalTensor, encoderTensors) res = { 'encoderTensors': encoderTensors, 'decoderTensors': decoderTensors, 'finalTensor': finalTensor } return res
[docs]class MB3Encoder(torch.nn.Module): ''' Vanilla UResNet with access to intermediate feature planes. Configurations -------------- depth : int Depth of UResNet, also corresponds to how many times we down/upsample. num_filters : int Number of filters in the first convolution of UResNet. Will increase linearly with depth. reps : int, optional Convolution block repetition factor kernel_size : int, optional Kernel size for the SC (sparse convolutions for down/upsample). input_kernel : int, optional Receptive field size for very first convolution after input layer. '''
[docs] def __init__(self, cfg, name='mobilenetv3_encoder'): super(MB3Encoder, self).__init__() setup_cnn_configuration(self, cfg, name) model_cfg = cfg['modules'][name] # UResNet Configurations self.reps = model_cfg.get('reps', 2) self.depth = model_cfg.get('depth', 5) self.num_filters = model_cfg.get('num_filters', 16) self.nPlanes = [i * self.num_filters for i in range(1, self.depth+1)] # self.kernel_size = cfg.get('kernel_size', 3) # self.downsample = cfg.get(downsample, 2) self.input_kernel = model_cfg.get('input_kernel', 3) # Initialize Input Layer self.input_layer = ME.MinkowskiConvolution( in_channels=self.num_input, out_channels=self.num_filters, kernel_size=self.input_kernel, stride=1, dimension=self.D) # Initialize Encoder self.encoding_conv = [] self.encoding_block = [] for i, F in enumerate(self.nPlanes): m = [] for _ in range(self.reps): m.append(MBResConvSE(F, F, dimension=self.D, activation=self.activation_name, activation_args=self.activation_args)) m = nn.Sequential(*m) self.encoding_block.append(m) m = [] if i < self.depth-1: m.append(ME.MinkowskiBatchNorm(F)) m.append(activations_construct( self.activation_name, **self.activation_args)) m.append(ME.MinkowskiConvolution( in_channels=self.nPlanes[i], out_channels=self.nPlanes[i+1], kernel_size=2, stride=2, dimension=self.D)) m = nn.Sequential(*m) self.encoding_conv.append(m) self.encoding_conv = nn.Sequential(*self.encoding_conv) self.encoding_block = nn.Sequential(*self.encoding_block)
[docs] def encoder(self, x): ''' Vanilla UResNet Encoder. INPUTS: - x (SparseTensor): MinkowskiEngine SparseTensor RETURNS: - result (dict): dictionary of encoder output with intermediate feature planes: 1) encoderTensors (list): list of intermediate SparseTensors 2) finalTensor (SparseTensor): feature tensor at deepest layer. ''' x = self.input_layer(x) encoderTensors = [x] for i, layer in enumerate(self.encoding_block): x = self.encoding_block[i](x) encoderTensors.append(x) x = self.encoding_conv[i](x) result = { "encoderTensors": encoderTensors, "finalTensor": x } return result
[docs] def forward(self, input): coordinates = input[:, 0:self.D+1].int() features = input[:, self.D+1:].float() x = ME.SparseTensor(features, coordinates=coordinates) encoderOutput = self.encoder(x) encoderTensors = encoderOutput['encoderTensors'] finalTensor = encoderOutput['finalTensor'] decoderTensors = self.decoder(finalTensor, encoderTensors) res = { 'encoderTensors': encoderTensors, 'decoderTensors': decoderTensors, 'finalTensor': finalTensor } return res