Source code for abcpy.NN_utilities.networks

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import grad


[docs]class SiameseNet(nn.Module): """ This is used in the contrastive distance learning. It is a network wrapping a standard neural network and feeding two samples through it at once. From https://github.com/adambielski/siamese-triplet"""
[docs] def __init__(self, embedding_net): super(SiameseNet, self).__init__() self.embedding_net = embedding_net
[docs] def forward(self, x1, x2): output1 = self.embedding_net(x1) output2 = self.embedding_net(x2) return output1, output2
[docs] def get_embedding(self, x): return self.embedding_net(x)
[docs]class TripletNet(nn.Module): """ This is used in the triplet distance learning. It is a network wrapping a standard neural network and feeding three samples through it at once. From https://github.com/adambielski/siamese-triplet"""
[docs] def __init__(self, embedding_net): super(TripletNet, self).__init__() self.embedding_net = embedding_net
[docs] def forward(self, x1, x2, x3): output1 = self.embedding_net(x1) output2 = self.embedding_net(x2) output3 = self.embedding_net(x3) return output1, output2, output3
[docs] def get_embedding(self, x): return self.embedding_net(x)
[docs]class ScalerAndNet(nn.Module): """Defines a nn.Module class that wraps a scaler and a neural network, and applies the scaler before passing the data through the neural network."""
[docs] def __init__(self, net, scaler): """""" super().__init__() self.net = net self.scaler = scaler
[docs] def forward(self, x): """""" x = torch.tensor(self.scaler.transform(x), dtype=torch.float32).to(next(self.net.parameters()).device) return self.net(x)
[docs]class DiscardLastOutputNet(nn.Module): """Defines a nn.Module class that wraps a scaler and a neural network, and applies the scaler before passing the data through the neural network. Next, the """
[docs] def __init__(self, net): super().__init__() self.net = net
[docs] def forward(self, x): x = self.net(x) if len(x.shape) == 1: return x[0:-1] if len(x.shape) == 2: return x[:, 0:-1] if len(x.shape) == 3: return x[:, :, 0:-1]
[docs]def createDefaultNN(input_size, output_size, hidden_sizes=None, nonlinearity=None, batch_norm_last_layer=False, batch_norm_last_layer_momentum=0.1): """Function returning a fully connected neural network class with a given input and output size, and optionally given hidden layer sizes (if these are not given, they are determined from the input and output size in a heuristic way, see below). In order to instantiate the network, you need to write: >>> createDefaultNN(input_size, output_size)() as the function returns a class, and () is needed to instantiate an object. If hidden_sizes is None, three hidden layers are used with the following sizes: ``[int(input_size * 1.5), int(input_size * 0.75 + output_size * 3), int(output_size * 5)]`` Note that the nonlinearity here is as an object or a functional, not a class, eg: nonlinearity = nn.Softplus() or: nonlinearity = nn.functional.softplus """ class DefaultNN(nn.Module): """Neural network class with sizes determined by the upper level variables.""" def __init__(self): super(DefaultNN, self).__init__() # put some fully connected layers: if hidden_sizes is not None and len(hidden_sizes) == 0: # it is effectively a linear network self.fc_in = nn.Linear(input_size, output_size) else: if hidden_sizes is None: # then set some default values for the hidden layers sizes; is this parametrization reasonable? hidden_sizes_list = [int(input_size * 1.5), int(input_size * 0.75 + output_size * 3), int(output_size * 5)] else: hidden_sizes_list = hidden_sizes self.fc_in = nn.Linear(input_size, hidden_sizes_list[0]) # define now the hidden layers self.fc_hidden = nn.ModuleList() for i in range(len(hidden_sizes_list) - 1): self.fc_hidden.append(nn.Linear(hidden_sizes_list[i], hidden_sizes_list[i + 1])) self.fc_out = nn.Linear(hidden_sizes_list[-1], output_size) # define the batch_norm: if batch_norm_last_layer: self.bn_out = nn.BatchNorm1d(output_size, affine=False, momentum=batch_norm_last_layer_momentum) def forward(self, x): if nonlinearity is None: nonlinearity_fcn = F.relu else: nonlinearity_fcn = nonlinearity if not hasattr(self, "fc_hidden"): # it means that hidden sizes was provided and the length of the list was 0 return self.fc_in(x) x = nonlinearity_fcn(self.fc_in(x)) for i in range(len(self.fc_hidden)): x = nonlinearity_fcn(self.fc_hidden[i](x)) x = self.fc_out(x) if batch_norm_last_layer: x = self.bn_out(x) return x return DefaultNN
[docs]def createDefaultNNWithDerivatives(input_size, output_size, hidden_sizes=None, nonlinearity=None, first_derivative_only=False): """Function returning a fully connected neural network class with a given input and output size, and optionally given hidden layer sizes (if these are not given, they are determined from the input and output size with some expression. This neural network is capable of computing the first and second derivatives of output with respect to input along with the forward pass. All layers in this neural network are linear. >>> createDefaultNN(input_size, output_size)() as the function returns a class, and () is needed to instantiate an object. If hidden_sizes is None, three hidden layers are used with the following sizes: ``[int(input_size * 1.5), int(input_size * 0.75 + output_size * 3), int(output_size * 5)]`` Note that the nonlinearity here is passed as a class, not an object, eg: nonlinearity = nn.Softplus """ if nonlinearity in [torch.nn.Softsign, torch.nn.Tanhshrink]: raise RuntimeError("The implementation of forward derivatives does not work with Tanhshrink and " "Softsign nonlinearities.") class DefaultNNWithDerivatives(nn.Module): """Neural network class with sizes determined by the upper level variables.""" def __init__(self): super(DefaultNNWithDerivatives, self).__init__() # put some fully connected layers: if nonlinearity is None: # default nonlinearity non_linearity = nn.ReLU else: non_linearity = nonlinearity # need to change name otherwise it gives Error if hidden_sizes is not None and len(hidden_sizes) == 0: # it is effectively a linear network self.fc_in = nn.Linear(input_size, output_size) else: if hidden_sizes is None: # then set some default values for the hidden layers sizes; is this parametrization reasonable? hidden_sizes_list = [int(input_size * 1.5), int(input_size * 0.75 + output_size * 3), int(output_size * 5)] else: hidden_sizes_list = hidden_sizes self.fc_in = nn.Linear(input_size, hidden_sizes_list[0]) self.nonlinearity_in = non_linearity() # define now the hidden layers self.fc_hidden = nn.ModuleList() self.nonlinearities_hidden = nn.ModuleList() for i in range(len(hidden_sizes_list) - 1): self.fc_hidden.append(nn.Linear(hidden_sizes_list[i], hidden_sizes_list[i + 1])) self.nonlinearities_hidden.append(non_linearity()) self.fc_out = nn.Linear(hidden_sizes_list[-1], output_size) def forward(self, x): if not hasattr(self, "fc_hidden"): # it means that hidden sizes was provided and the length of the list was 0, ie the return self.fc_in(x) x = self.fc_in(x) x1 = self.nonlinearity_in(x) for i in range(len(self.fc_hidden)): x = self.fc_hidden[i](x1) x1 = self.nonlinearities_hidden[i](x) x = self.fc_out(x1) return x def forward_and_derivatives(self, x): # initialize the derivatives: f = self.fc_in.weight.unsqueeze(0).repeat(x.shape[0], 1, 1).transpose(2, 1).transpose(0, 1) # one for each element of the batch if not first_derivative_only: s = torch.zeros_like(f) if not hasattr(self, "fc_hidden"): # it means that hidden sizes was provided and the length of the list was 0, ie the net is a single layer. if first_derivative_only: return self.fc_in(x), f.transpose(0, 1) else: return self.fc_in(x), f.transpose(0, 1), s.transpose(0, 1) x = self.fc_in(x) x1 = self.nonlinearity_in(x) for i in range(len(self.fc_hidden)): z = x1.grad_fn(torch.ones_like(x1)) # here we repeat some computation from the above line # z = grad(x1, x, torch.ones_like(x1), create_graph=True)[0] # here we repeat some computation from the above line # you need to update first the second derivative, as you need the first derivative at previous layer if not first_derivative_only: s = z * s + grad(z, x, torch.ones_like(z), retain_graph=True)[0] * f ** 2 f = z * f f = F.linear(f, self.fc_hidden[i].weight) if not first_derivative_only: s = F.linear(s, self.fc_hidden[i].weight) x = self.fc_hidden[i](x1) x1 = self.nonlinearities_hidden[i](x) z = x1.grad_fn(torch.ones_like(x1)) # here we repeat some computation from the above line # z = grad(x1, x, torch.ones_like(x1), create_graph=True)[0] # here we repeat some computation from the above line # you need to update first the second derivative, as you need the first derivative at previous layer if not first_derivative_only: s = z * s + grad(z, x, torch.ones_like(z), retain_graph=True)[0] * f ** 2 f = z * f f = F.linear(f, self.fc_out.weight) if not first_derivative_only: s = F.linear(s, self.fc_out.weight) x = self.fc_out(x1) if first_derivative_only: return x, f.transpose(0, 1) else: return x, f.transpose(0, 1), s.transpose(0, 1) def forward_and_full_derivatives(self, x): """This computes jacobian and full Hessian matrix""" # initialize the derivatives (one for each element of the batch) f = self.fc_in.weight.unsqueeze(0).repeat(x.shape[0], 1, 1).transpose(2, 1).transpose(0, 1) H = torch.zeros((f.shape[0], *f.shape)).to(f) # hessian has an additional dimension wrt f if not hasattr(self, "fc_hidden"): # it means that hidden sizes was provided and the length of the list was 0, ie the net is a single layer return self.fc_in(x), f.transpose(0, 1), H.transpose(0, 2) x = self.fc_in(x) x1 = self.nonlinearity_in(x) for i in range(len(self.fc_hidden)): z = x1.grad_fn(torch.ones_like(x1)) # here we repeat some computation from the above line # print("H", H.shape, "z", z.shape, "z'", grad(z, x, torch.ones_like(z), retain_graph=True)[0].shape, "f", f.shape) # z = grad(x1, x, torch.ones_like(x1), create_graph=True)[0] # here we repeat some computation from the above line # you need to update first the second derivative, as you need the first derivative at previous layer H = z * H + grad(z, x, torch.ones_like(z), retain_graph=True)[0] * torch.einsum('ibo,jbo->ijbo', f, f) f = z * f f = F.linear(f, self.fc_hidden[i].weight) H = F.linear(H, self.fc_hidden[i].weight) x = self.fc_hidden[i](x1) x1 = self.nonlinearities_hidden[i](x) z = x1.grad_fn(torch.ones_like(x1)) # here we repeat some computation from the above line # z = grad(x1, x, torch.ones_like(x1), create_graph=True)[0] # here we repeat some computation from the above line # you need to update first the second derivative, as you need the first derivative at previous layer H = z * H + grad(z, x, torch.ones_like(z), retain_graph=True)[0] * torch.einsum('ibo,jbo->ijbo', f, f) f = z * f f = F.linear(f, self.fc_out.weight) H = F.linear(H, self.fc_out.weight) x = self.fc_out(x1) return x, f.transpose(0, 1), H.transpose(0, 2) return DefaultNNWithDerivatives