Source code for mixturelib.hyper_models

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
The :mod:`mixturelib.hyper_models` contains classes:

- :class:`mixturelib.hyper_models.HyperModel`
- :class:`mixturelib.hyper_models.HyperModelDirichlet`
- :class:`mixturelib.hyper_models.HyperExpertNN`
"""
from __future__ import print_function

__docformat__ = 'restructuredtext'

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import TensorDataset, DataLoader, Dataset
[docs]class HyperModel: r"""Base class for all hyper models.""" def __init__(self): """Constructor method """ pass
[docs] def E_step(self, X, Y, Z, HyperParameters): r"""Doing E-step of EM-algorithm. Finds variational probability `q` of model parameters. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param Z: The tensor of shape `num_elements` :math:`\times` `num_models`. :type Z: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict """ raise NotImplementedError
[docs] def M_step(self, X, Y, Z, HyperParameters): r"""Doing M-step of EM-algorithm. Finds model hyper parameters. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param Z: The tensor of shape `num_elements` :math:`\times` `num_models`. :type Z: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict """ raise NotImplementedError
[docs] def LogPiExpectation(self, X, Y, HyperParameters): r"""Returns the expected value of each models probability. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict """ raise NotImplementedError
[docs] def PredictPi(self, X, HyperParameters): r"""Returns the probability of each models. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict """ raise NotImplementedError
[docs]class HyperModelDirichlet(HyperModel): r"""A hyper model for mixture of model. The hyper model cannot predict local model for each object, because model probability does not depend on object. In this hyper model, the probability of each local model is a vector from dirichlet distribution with parameter :math:`\mu`. :param output_dim: The number of local models. :type output_dim: int :param device: The device for pytorch. Can be 'cpu' or 'gpu'. Default 'cpu'. Example: >>> _ = torch.random.manual_seed(42) # Set random seed for repeatability >>> >>> w = torch.randn(2, 1) # Generate real parameter vector >>> X = torch.randn(5, 2) # Generate features data >>> Z = torch.distributions.dirichlet.Dirichlet( ... torch.tensor([0.5, 0.5])).sample( ... (5,)) # Set corresponding between data and local models. >>> Y = X@w + 0.1*torch.randn(5, 1) # Generate target data with noise 0.1 >>> >>> hyper_model = HyperModelDirichlet( ... output_dim=2) # Init hyper model with Diriclet weighting >>> hyper_parameters = {} # Withor hyper parameters >>> >>> hyper_model.LogPiExpectation( ... X, Y, hyper_parameters) # Log of probability before E step tensor([[-1.0000, -1.0000], [-1.0000, -1.0000], [-1.0000, -1.0000], [-1.0000, -1.0000], [-1.0000, -1.0000]]) >>> >>> hyper_model.E_step(X, Y, Z, hyper_parameters) >>> hyper_model.LogPiExpectation( ... X, Y, hyper_parameters) # Log of probability after E step tensor([[-0.7118, -0.8310], [-0.7118, -0.8310], [-0.7118, -0.8310], [-0.7118, -0.8310], [-0.7118, -0.8310]]) """ def __init__(self, output_dim=2, device='cpu'): """Constructor method """ super(HyperModelDirichlet, self).__init__() self.output_dim = output_dim self.device = device self.mu = torch.ones(self.output_dim) self.m = torch.zeros_like(self.mu) self.m.data = self.mu.data.clone() self.N = 0
[docs] def E_step(self, X, Y, Z, HyperParameters): r"""Doing E-step of EM-algorithm. Finds variational probability `q` of model parameters. Calculate analytical solution for estimate `q` in the class of normal distributions :math:`q = Dir(m)`, where :math:`m = \mu + \gamma`, where :math:`\gamma_k = \sum_{i=1}^{num\_elements}Z_{ik}`, and :math:`\mu` is prior. .. warning:: Now :math:`\mu_k` is `1` for all `k`, and can not be changed. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param Z: The tensor of shape `num_elements` :math:`\times` `num_models`. :type Z: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict """ gamma = Z.sum(dim=0) self.m = (self.mu + gamma).detach() self.N = Z.shape[0] pass
[docs] def M_step(self, X, Y, Z, HyperParameters): r"""The method does nothing. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param Z: The tensor of shape `num_elements` :math:`\times` `num_models`. :type Z: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict """ pass
[docs] def LogPiExpectation(self, X, Y, HyperParameters): r"""Returns the expected value of each models log of probability. Returns the expectation of :math:`\log \pi` value where :math:`\pi` is a random value from Dirichlet distribution. This function calculates by using :math:`\digamma` function :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict :return: The tensor of shape `num_elements` :math:`\times` `num_models`. The espected value of each models probability. :rtype: FloatTensor """ temp_1 = torch.ones([X.shape[0], self.output_dim]) temp_2 = (torch.digamma(self.m) - torch.digamma(self.output_dim * self.mu + self.N)) return temp_1 * temp_2
[docs] def PredictPi(self, X, HyperParameters): r"""Returns the probability (weight) of each models. Return the same vector :math:`\pi` for all object. Each :math:`\pi = \frac{\textbf{m}}{\sum \textbf{m}_k}`, where :math:`\textbf{m}` is a parameter of Dirichlet pdf. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict :return: The tensor of shape `num_elements` :math:`\times` `num_models`. The probability (weight) of each models. :rtype: FloatTensor """ denum = self.m.sum() if denum != 0.: pi = torch.ones([X.shape[0], self.output_dim]) * (self.m / denum) else: pi = torch.zeros([X.shape[0], self.output_dim]) return pi
[docs]class HyperExpertNN(nn.Module, HyperModel): r"""A hyper model for mixture of experts. The hyper model prediction on local models probability are depend on the object. In this hyper model, the probability of each local model is a neural network prediction with softmax. Neural network is a three layer fully conected neural network. :param input_dim: The number of features. :type input_dim: int :param hidden_dim: The number of parameters in hidden layer. :type hidden_dim: int :param output_dim: The number of local models. :type output_dim: int :param epochs: The number epoch to train neural network in each step. :type epochs: int :param device: The device for pytorch. Can be 'cpu' or 'gpu'. Default 'cpu'. Example: >>> _ = torch.random.manual_seed(42) # Set random seed for repeatability >>> >>> w = torch.randn(2, 1) # Generate real parameter vector >>> X = torch.randn(5, 2) # Generate features data >>> Z = torch.distributions.dirichlet.Dirichlet( ... torch.tensor([0.5, 0.5])).sample( ... (5,)) # Set corresponding between data and local models. >>> Y = X@w + 0.1*torch.randn(5, 1) # Generate target data with noise 0.1 >>> >>> hyper_model = HyperExpertNN( ... input_dim=2, ... output_dim=2) # Init hyper model with Diriclet weighting >>> hyper_parameters = {} # Withor hyper parameters >>> >>> hyper_model.LogPiExpectation( ... X, Y, hyper_parameters) # Log of probability before E step tensor([[-0.4981, -0.9356], [-0.5176, -0.9063], [-0.4925, -0.9443], [-0.4957, -0.9395], [-0.4969, -0.9376]]) >>> >>> hyper_model.E_step(X, Y, Z, hyper_parameters) >>> hyper_model.LogPiExpectation( ... X, Y, hyper_parameters) # Log of probability after E step tensor([[-0.6294, -0.7612], [-0.9327, -0.5000], [-0.3273, -1.2760], [-0.5775, -0.8239], [-0.5357, -0.8801]]) """ def __init__(self, input_dim=20, hidden_dim=10, output_dim=10, epochs=100, device='cpu'): """Constructor method """ super(HyperExpertNN, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.device = device self.epochs = epochs self.linear1 = nn.Linear(input_dim, hidden_dim) self.linear2 = nn.Linear(hidden_dim, hidden_dim) self.linear3 = nn.Linear(hidden_dim, output_dim) self.optimizer = torch.optim.Adam(self.parameters()) self.to(device)
[docs] def forward(self, input): r"""Returns model prediction for the given input data. .. warning:: The number `num_answers` can be just `1`. :param input: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type input: FloatTensor. :return: The tensor of shape `num_elements` :math:`\times` `num_models`. Model prediction of probability for all local models for the given input data. :rtype: FloatTensor """ out = input out = self.linear1(out) out = F.relu(out) out = self.linear2(out) out = F.relu(out) out = self.linear3(out) return out
[docs] def E_step(self, X, Y, Z, HyperParameters): r"""The method does nothing. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param Z: The tensor of shape `num_elements` :math:`\times` `num_models`. :type Z: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict """ pass
[docs] def M_step(self, X, Y, Z, HyperParameters): r"""Doing M-step of EM-algorithm. Finds model parameters by using gradient descent. Parameters are optimized with respect to the loss function :math:`loss = -\sum_{i=1}^{num\_elements}\sum_{k=1}^{num\_models} \log\pi_k(x_i, V)`, where `V` is a neural network parameters. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param Z: The tensor of shape `num_elements` :math:`\times` `num_models`. :type Z: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict """ dataset = TensorDataset(X.to(self.device), Z.to(self.device)) for _ in range(self.epochs): train_generator = DataLoader(dataset = dataset, batch_size = 128, shuffle=True) for it, (batch_of_x, batch_of_z) in enumerate(train_generator): self.zero_grad() loss = -(F.log_softmax(self.forward(batch_of_x), dim = -1) * batch_of_z).mean() loss.backward() self.optimizer.step() pass
[docs] def LogPiExpectation(self, X, Y, HyperParameters): r"""Returns the expected value of each models log of probability. Takes log softmax from the forward method. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict :return: The tensor of shape `num_elements` :math:`\times` `num_models`. The espected value of each models probability. :rtype: FloatTensor """ return F.log_softmax(self.forward(X), dim = -1)
[docs] def PredictPi(self, X, HyperParameters): r"""Returns the probability (weight) of each models. Takes softmax from the forward method. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict :return: The tensor of shape `num_elements` :math:`\times` `num_models`. The probability (weight) of each models. :rtype: FloatTensor """ return F.softmax(self.forward(X), dim = -1)
[docs]class HyperModelGateSparsed(HyperModel): r"""A hyper model for mixture of model. Each :math:`i`-th object from train dataset has own probability to each model :math:`\pi^i`. In this hyper model, the probability of each local model is a vector from dirichlet distribution with parameter :math:`\mu`, and :math:`l`. :param output_dim: The number of local models. :type output_dim: int :param device: The device for pytorch. Can be 'cpu' or 'gpu'. Default 'cpu'. Example: >>> _ = torch.random.manual_seed(42) # Set random seed for repeatability >>> >>> w = torch.randn(2, 1) # Generate real parameter vector >>> X = torch.randn(5, 2) # Generate features data >>> Z = torch.distributions.dirichlet.Dirichlet( ... torch.tensor([0.5, 0.5])).sample( ... (5,)) # Set corresponding between data and local models. >>> Y = X@w + 0.1*torch.randn(5, 1) # Generate target data with noise 0.1 >>> >>> hyper_model = HyperModelGateSparsed( ... output_dim=2) # Model with Diriclet weighting for each sample >>> hyper_parameters = {} # Withor hyper parameters >>> >>> hyper_model.LogPiExpectation( ... X, Y, hyper_parameters) # Log of probability before E step tensor([[-1.3863, -1.3863], [-1.3863, -1.3863], [-1.3863, -1.3863], [-1.3863, -1.3863], [-1.3863, -1.3863]]) >>> >>> hyper_model.E_step(X, Y, Z, hyper_parameters) >>> hyper_model.LogPiExpectation( ... X, Y, hyper_parameters) # Log of probability after E step tensor([[-1.9677, -0.4830], [-1.7785, -0.5417], [-0.5509, -1.7521], [-0.7250, -1.3642], [-0.4839, -1.9644]]) """ def __init__(self, output_dim=2, gamma=1., mu=torch.ones(2), device='cpu'): """Constructor method """ super(HyperModelGateSparsed, self).__init__() self.output_dim = output_dim self.device = device self.mu = mu self.mu = self.mu/self.mu.sum() self.gamma = gamma self.mu_posterior = self.mu.clone() self.gamma_posterior = torch.tensor(self.gamma)
[docs] def E_step(self, X, Y, Z, HyperParameters): r"""Doing E-step of EM-algorithm. Finds variational probability `q` of model parameters. Calculate analytical solution for estimate `q` in the class of normal distributions :math:`q = Dir(m)`, where :math:`m = \mu + \gamma`, where :math:`\gamma_k = \sum_{i=1}^{num\_elements}Z_{ik}`, and :math:`\mu` is prior. .. warning:: Now :math:`\mu_k` is `1` for all `k`, and can not be changed. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param Z: The tensor of shape `num_elements` :math:`\times` `num_models`. :type Z: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict """ gamma = Z self.mu_posterior = (self.gamma*self.mu + gamma).detach() self.gamma_posterior = self.mu_posterior.sum(dim=-1).view([-1, 1]) self.mu_posterior = self.mu_posterior/self.gamma_posterior pass
[docs] def M_step(self, X, Y, Z, HyperParameters): r"""The method does nothing. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param Z: The tensor of shape `num_elements` :math:`\times` `num_models`. :type Z: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict """ pass
[docs] def LogPiExpectation(self, X, Y, HyperParameters): r"""Returns the expected value of each models log of probability. Returns the expectation of :math:`\log \pi` value where :math:`\pi` is a random value from Dirichlet distribution. This function calculates by using :math:`\digamma` function :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param Y: The tensor of shape `num_elements` :math:`\times` `num_answers`. :type Y: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict :return: The tensor of shape `num_elements` :math:`\times` `num_models`. The espected value of each models probability. :rtype: FloatTensor """ temp_1 = torch.ones([X.shape[0], self.output_dim]) temp_2 = (torch.digamma(self.gamma_posterior*self.mu_posterior) - torch.digamma(self.gamma_posterior)) return temp_1 * temp_2
[docs] def PredictPi(self, X, HyperParameters): r"""Returns the probability (weight) of each models. Return the same vector :math:`\pi` for all object. Each :math:`\pi = \frac{\textbf{m}}{\sum \textbf{m}_k}`, where :math:`\textbf{m}` is a parameter of Dirichlet pdf. :param X: The tensor of shape `num_elements` :math:`\times` `num_feature`. :type X: FloatTensor :param HyperParameters: The dictionary of all hyper parametrs. Where `key` is string and `value` is FloatTensor. :type HyperParameters: dict :return: The tensor of shape `num_elements` :math:`\times` `num_models`. The probability (weight) of each models. :rtype: FloatTensor """ pi = torch.ones([X.shape[0], self.output_dim]) * self.mu_posterior return pi