cMHN 1.2
C++ library for learning MHNs with pRC
Loading...
Searching...
No Matches
cMHN Namespace Reference

Namespaces

namespace  nonTT
 
namespace  TT
 

Classes

class  KLScore
 Class storing a KL Score, specializes the Score class. More...
 
class  L1Regulator
 Class storing an L1 Regulator, specializes the Regulator class. More...
 
class  L2Regulator
 Class storing an L2 Regulator, specializes the Regulator class. More...
 
class  Regulator
 Class storing all relevant information for a regulator. More...
 
class  Score
 Class storing all relevant information for a score. More...
 
class  sKLdynScore
 Class storing a dynamic sKL-Divergence Score, specializes the Score class. More...
 
class  sKLScore
 Class storing a static sKL-Divergence Score, specializes the Score class. More...
 

Functions

template<class T , pRC::Size D>
decltype(expand(pRC::makeConstantSequence< pRC::Size, D, 2 >(), [](auto const ... Ns) { return pRC::Tensor< T, Ns... >{};})) calculatePTheta (nonTT::MHNOperator< T, D > const &op)
 Calculates the vector pTheta given a nonTT MHN Operator.
 
template<pRC::Size R, class T , class X , pRC::Size D>
calculatePTheta (TT::MHNOperator< T, D > const &op, X const &pInit, T const &toleranceSolver)
 Calculates the vector pTheta (as a TT) given a TT MHN Operator and a tolerance.
 
template<pRC::Size R, class T , pRC::Size D>
auto calculatePTheta (TT::MHNOperator< T, D > const &op, T const &toleranceSolver)
 Calculates the vector pTheta (as a TT) given a TT MHN Operator and a tolerance.
 
template<class T , pRC::Size D, class S >
T calculateScore (nonTT::MHNOperator< T, D > const &op, std::map< S, T > const &pD, cMHN::Score< T > const &Score, cMHN::Regulator< T, D > const &Regulator)
 Calculate score of a theta matrix given some data distribution pD.
 
template<pRC::Size RP, class T , pRC::Size D, class S >
T calculateScore (TT::MHNOperator< T, D > const &op, std::map< S, T > const &pD, cMHN::Score< T > const &Score, cMHN::Regulator< T, D > const &Regulator, T const &toleranceSolverP=1e-4)
 Calculate score of a theta matrix given some data distribution pD (using TTs)
 
template<class T , pRC::Size D, class S >
std::tuple< T, pRC::Tensor< T, D, D > > calculateScoreAndGradient (nonTT::MHNOperator< T, D > const &op, std::map< S, T > const &pD, cMHN::Score< T > const &Score, cMHN::Regulator< T, D > const &Regulator, T const &toleranceSolverQ=1e-8)
 Calculate score and gradient of a theta matrix given some data distribution pD.
 
template<pRC::Size RP, pRC::Size RQ, class T , pRC::Size D, class S , class X >
std::tuple< T, pRC::Tensor< T, D, D > > calculateScoreAndGradient (TT::MHNOperator< T, D > const &op, std::map< S, T > const &pD, cMHN::Score< T > const &Score, cMHN::Regulator< T, D > const &Regulator, X &pInit, T const &toleranceSolverP=1e-4, T const &toleranceSolverQ=1e-4)
 Calculate score and gradient of a theta matrix given some distribution pD (using TTs)
 
template<pRC::Size RP, pRC::Size RQ, class T , pRC::Size D, class S >
std::tuple< T, pRC::Tensor< T, D, D > > calculateScoreAndGradient (TT::MHNOperator< T, D > const &op, std::map< S, T > const &pD, cMHN::Score< T > const &Score, cMHN::Regulator< T, D > const &Regulator, T const &toleranceSolverP=1e-4, T const &toleranceSolverQ=1e-4)
 Calculate score and gradient of a theta matrix given some distribution pD (using TTs)
 
template<class T , class S >
static auto learnIndependenceModel (std::map< S, T > const &pD)
 Calculate the theta matrix corresponding to the independence model for a given data distribution.
 
template<class T , class S >
static auto combinePDs (std::vector< std::map< S, T > > const &pDs, std::vector< pRC::Index > const &lengths)
 Combines multiple data distributions into a single distribution.
 
template<class T , pRC::Size D, class F >
static auto generateData (pRC::RandomEngine< F > &rng, pRC::Tensor< T, D, D > const &smallThetaGT, pRC::Size const &size, std::string const &header, std::string const &filename)
 Generates a data file from a given ground truth model.
 
template<class T , pRC::Size D>
static auto generateData (pRC::Tensor< T, D, D > const &smallThetaGT, pRC::Size const &size, std::string const &header, std::string const &filename)
 Generates a data file from a given ground truth model.
 
template<class T , pRC::Size D, class F >
static auto generatePD (pRC::RandomEngine< F > &rng, pRC::Tensor< T, D, D > const &smallThetaGT, pRC::Size const &size)
 Generates a data distribution from a given ground truth model.
 
template<class T , pRC::Size D>
static auto generatePD (pRC::Tensor< T, D, D > const &smallThetaGT, pRC::Size const &size)
 Generates a data distribution from a given ground truth model.
 
template<pRC::Size D, class T = pRC::Float<>, class F >
static auto generateTheta (pRC::RandomEngine< F > &rng, pRC::Float<> const &fullness=0.5, T const &diagonalMean=-2, T const &diagonalStd=2, T const &offDiagonalMean=0, T const &offDiagonalB=0.75)
 Generates a random theta matrix according to given distributions, and with given fullness.
 
template<pRC::Size D, class T = pRC::Float<>>
static auto generateTheta (pRC::Float<> const &fullness=0.5, T const &diagonalMean=-2, T const &diagonalStd=2, T const &offDiagonalMean=0, T const &offDiagonalB=0.75)
 Generates a random theta matrix according to given distributions, and with given fullness.
 
template<class T , pRC::Size D>
static auto getCrossValSplits (std::string const &filename, pRC::Index const &k)
 Splits the samples in a dataset into k as equal as possible sized sets that can be used for k-fold cross-validated training.
 
template<class T , pRC::Size D>
static auto readData (std::string const &filename)
 Reads a dataset from file, where the first line is the header (containing event names) and all subsequent lines contain one sample, with entries separated by commas or spaces.
 
static auto readHeader (std::string const &filename)
 Reads the header of a dataset from file.
 
template<class T , pRC::Size D>
static auto readTheta (std::string const &filename)
 Reads a theta matrix from file, discarding its header.
 
template<class S >
static auto getRandomSubscripts ()
 Generates a uniformly distributed random Subscripts element for a Tensor or a TT.
 
template<class T , pRC::Size D>
static auto writeTheta (std::string const &filename, std::string const &header, pRC::Tensor< T, D, D > const &theta, std::map< std::string, std::string > const &logInfoNames={}, std::map< std::string, double > const &logInfoNumbers={})
 Writes a theta matrix to file, including additional logging information at the bottom.
 

Function Documentation

◆ calculatePTheta() [1/3]

template<class T , pRC::Size D>
decltype(expand(pRC::makeConstantSequence< pRC::Size, D, 2 >(),[](auto const ... Ns) { return pRC::Tensor< T, Ns... >{}; })) cMHN::calculatePTheta ( nonTT::MHNOperator< T, D > const & op)

Calculates the vector pTheta given a nonTT MHN Operator.

This solves the linear equation (1-Q)*pTheta = p0, where Q is an MHN Operator and p0 is the initial distribution, i.e. (100%, 0%, ..., 0%). This operation is exact, so no tolerance can be provided.

Template Parameters
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
Parameters
opnonTT MHN Operator constructed from a theta matrix

◆ calculatePTheta() [2/3]

template<pRC::Size R, class T , pRC::Size D>
auto cMHN::calculatePTheta ( TT::MHNOperator< T, D > const & op,
T const & toleranceSolver )

Calculates the vector pTheta (as a TT) given a TT MHN Operator and a tolerance.

This solves the linear equation (1-Q)*pTheta = p0, where Q is an MHN Operator and p0 is the initial distribution, i.e. (100%, 0%, ..., 0%).

Template Parameters
Rmax TT rank of pTheta
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
Parameters
opTT MHN Operator constructed from a theta matrix
toleranceSolvertolerance passed to the solver of the linear system

◆ calculatePTheta() [3/3]

template<pRC::Size R, class T , class X , pRC::Size D>
X cMHN::calculatePTheta ( TT::MHNOperator< T, D > const & op,
X const & pInit,
T const & toleranceSolver )

Calculates the vector pTheta (as a TT) given a TT MHN Operator and a tolerance.

This solves the linear equation (1-Q)*pTheta = p0, where Q is an MHN Operator and p0 is the initial distribution, i.e. (100%, 0%, ..., 0%).

Template Parameters
Rmax TT rank of pTheta
Ttype used for calculations (inferred)
Xclass of pTheta (inferred)
Dnumber of events of the dataset (inferred)
Parameters
opTT MHN Operator constructed from a theta matrix
pInitinitial value for linear equation
toleranceSolvertolerance passed to the solver of the linear system

◆ calculateScore() [1/2]

template<class T , pRC::Size D, class S >
T cMHN::calculateScore ( nonTT::MHNOperator< T, D > const & op,
std::map< S, T > const & pD,
cMHN::Score< T > const & Score,
cMHN::Regulator< T, D > const & Regulator )

Calculate score of a theta matrix given some data distribution pD.

Template Parameters
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
SSubscripts type used as keys in data distributions (inferred)
Parameters
opMHNOperator to use as ground truth model
pDdata distribution
ScoreScore to use
RegulatorRegulator to used

◆ calculateScore() [2/2]

template<pRC::Size RP, class T , pRC::Size D, class S >
T cMHN::calculateScore ( TT::MHNOperator< T, D > const & op,
std::map< S, T > const & pD,
cMHN::Score< T > const & Score,
cMHN::Regulator< T, D > const & Regulator,
T const & toleranceSolverP = 1e-4 )

Calculate score of a theta matrix given some data distribution pD (using TTs)

Template Parameters
RPmax TT rank of pTheta
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
SSubscripts type used as keys in data distributions (inferred)
Parameters
opMHNOperator to use as ground truth model
pDdata distribution
ScoreScore to use
RegulatorRegulator to used
toleranceSolverPtolerance to use when calculating pTheta (default: 1e-4)

◆ calculateScoreAndGradient() [1/3]

template<class T , pRC::Size D, class S >
std::tuple< T, pRC::Tensor< T, D, D > > cMHN::calculateScoreAndGradient ( nonTT::MHNOperator< T, D > const & op,
std::map< S, T > const & pD,
cMHN::Score< T > const & Score,
cMHN::Regulator< T, D > const & Regulator,
T const & toleranceSolverQ = 1e-8 )

Calculate score and gradient of a theta matrix given some data distribution pD.

Template Parameters
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
SSubscripts type used as keys in data distributions (inferred)
Parameters
opMHNOperator to calculate score and gradient of
pDdata distribution
ScoreScore to use
RegulatorRegulator to use
toleranceSolverQtolerance to use when calculating q (default: 1e-8)

◆ calculateScoreAndGradient() [2/3]

template<pRC::Size RP, pRC::Size RQ, class T , pRC::Size D, class S >
std::tuple< T, pRC::Tensor< T, D, D > > cMHN::calculateScoreAndGradient ( TT::MHNOperator< T, D > const & op,
std::map< S, T > const & pD,
cMHN::Score< T > const & Score,
cMHN::Regulator< T, D > const & Regulator,
T const & toleranceSolverP = 1e-4,
T const & toleranceSolverQ = 1e-4 )

Calculate score and gradient of a theta matrix given some distribution pD (using TTs)

Template Parameters
RPmax TT rank of pTheta
RQmax TT rank of q
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
SSubscripts type used as keys in data distributions (inferred)
Parameters
opMHNOperator to calculate score and gradient of
pDdata distribution
ScoreScore to use
RegulatorRegulator to use
toleranceSolverPtolerance to use when calculating pTheta (default: 1e-4)
toleranceSolverQtolerance to use when calculating q (default: 1e-4)

◆ calculateScoreAndGradient() [3/3]

template<pRC::Size RP, pRC::Size RQ, class T , pRC::Size D, class S , class X >
std::tuple< T, pRC::Tensor< T, D, D > > cMHN::calculateScoreAndGradient ( TT::MHNOperator< T, D > const & op,
std::map< S, T > const & pD,
cMHN::Score< T > const & Score,
cMHN::Regulator< T, D > const & Regulator,
X & pInit,
T const & toleranceSolverP = 1e-4,
T const & toleranceSolverQ = 1e-4 )

Calculate score and gradient of a theta matrix given some distribution pD (using TTs)

pInit is used as the starting point of the calculation of pTheta. Beware that it is updated in place to the solution pTheta after it is obtained.

Template Parameters
RPmax TT rank of pTheta
RQmax TT rank of q
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
SSubscripts type used as keys in data distributions (inferred)
Xclass of pInit (inferred)
Parameters
opMHNOperator to calculate score and gradient of
pDdata distribution
ScoreScore to use
RegulatorRegulator to use
pInitinitial value for linear equation
toleranceSolverPtolerance to use when calculating pTheta (default: 1e-4)
toleranceSolverQtolerance to use when calculating q (default: 1e-4)

◆ combinePDs()

template<class T , class S >
static auto cMHN::combinePDs ( std::vector< std::map< S, T > > const & pDs,
std::vector< pRC::Index > const & lengths )
inlinestatic

Combines multiple data distributions into a single distribution.

Template Parameters
Ttype used for calculations (inferred)
SSubscripts type used as keys in data distribution (inferred)
Parameters
pDsdata distributions to combine

◆ generateData() [1/2]

template<class T , pRC::Size D, class F >
static auto cMHN::generateData ( pRC::RandomEngine< F > & rng,
pRC::Tensor< T, D, D > const & smallThetaGT,
pRC::Size const & size,
std::string const & header,
std::string const & filename )
inlinestatic

Generates a data file from a given ground truth model.

Template Parameters
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
Fbase class of pRC::RandomEngine (inferred)
Parameters
rngRandomEngine to use for generating random numbers (needed not to get the same rng every time)
smallThetaGTsmall theta matrix of ground truth model (pRC::Tensor<T, D, D> object)
sizenumber of samples to take into account when constructing the data distribution pTheta
headerheader of the output file, i.e. event names
filenameoutput filename

◆ generateData() [2/2]

template<class T , pRC::Size D>
static auto cMHN::generateData ( pRC::Tensor< T, D, D > const & smallThetaGT,
pRC::Size const & size,
std::string const & header,
std::string const & filename )
inlinestatic

Generates a data file from a given ground truth model.

Template Parameters
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
Parameters
smallThetaGTsmall theta matrix of ground truth model (pRC::Tensor<T, D, D> object)
sizenumber of samples to take into account when constructing the data distribution
headerheader of the output file, i.e. event names
filenameoutput filename

◆ generatePD() [1/2]

template<class T , pRC::Size D, class F >
static auto cMHN::generatePD ( pRC::RandomEngine< F > & rng,
pRC::Tensor< T, D, D > const & smallThetaGT,
pRC::Size const & size )
inlinestatic

Generates a data distribution from a given ground truth model.

Template Parameters
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
Fbase class of pRC::RandomEngine (inferred)
Parameters
rngRandomEngine to use for generating random numbers (needed not to get the same rng every time)
smallThetaGTsmall theta matrix of ground truth model (pRC::Tensor<T, D, D> object)
sizenumber of samples to take into account when constructing the data distribution

◆ generatePD() [2/2]

template<class T , pRC::Size D>
static auto cMHN::generatePD ( pRC::Tensor< T, D, D > const & smallThetaGT,
pRC::Size const & size )
inlinestatic

Generates a data distribution from a given ground truth model.

Template Parameters
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
Parameters
smallThetaGTsmall theta matrix of ground truth model (pRC::Tensor<T, D, D> object)
sizenumber of samples to take into account when constructing the data distribution
toleranceSolverPtolerance passed to the solver calculating pTheta

◆ generateTheta() [1/2]

template<pRC::Size D, class T = pRC::Float<>>
static auto cMHN::generateTheta ( pRC::Float<> const & fullness = 0.5,
T const & diagonalMean = -2,
T const & diagonalStd = 2,
T const & offDiagonalMean = 0,
T const & offDiagonalB = 0.75 )
inlinestatic

Generates a random theta matrix according to given distributions, and with given fullness.

This function is used when no pRC::RandomEngine is passed.

Diagonal entries of the theta matrix (small thetas!) are generated using a Gaussian distribution, whose mean and standard deviation can be specified. Off diagonal entries are generated using a Laplace distribution, whose mean and b-parameter can be specified. The distribution's standard deviation is given by sqrt(2)*b.

Template Parameters
Dnumber of events of the dataset
Ttype used for calculations (inferred)
Parameters
fullnessPercentage of entries in theta matrix that are allowed to differ from 0 (stochastically)
diagonalMeanmean of the distribution generating the diagonal entries
diagonalStdstandard deviation of the distribution generating the diagonal entries
offDiagonalMeanmean of the distribution generating the off diagonal entries
offDiagonalBB parameter of the distribution generating the off diagonal entries

◆ generateTheta() [2/2]

template<pRC::Size D, class T = pRC::Float<>, class F >
static auto cMHN::generateTheta ( pRC::RandomEngine< F > & rng,
pRC::Float<> const & fullness = 0.5,
T const & diagonalMean = -2,
T const & diagonalStd = 2,
T const & offDiagonalMean = 0,
T const & offDiagonalB = 0.75 )
inlinestatic

Generates a random theta matrix according to given distributions, and with given fullness.

Diagonal entries of the theta matrix (small thetas!) are generated using a Gaussian distribution, whose mean and standard deviation can be specified. Off diagonal entries are generated using a Laplace distribution, whose mean and b-parameter can be specified. The distribution's standard deviation is given by sqrt(2)*b.

Template Parameters
Dnumber of events of the dataset
Ttype used for calculations (inferred)
Fbase class of pRC::RandomEngine (inferred)
Parameters
rngRandomEngine to use for generating random numbers (needed not to get the same rng every time)
fullnessPercentage of entries in theta matrix that are allowed to differ from 0 (stochastically)
diagonalMeanmean of the distribution generating the diagonal entries
diagonalStdstandard deviation of the distribution generating the diagonal entries
offDiagonalMeanmean of the distribution generating the off diagonal entries
offDiagonalBB parameter of the distribution generating the off diagonal entries

◆ getCrossValSplits()

template<class T , pRC::Size D>
static auto cMHN::getCrossValSplits ( std::string const & filename,
pRC::Index const & k )
inlinestatic

Splits the samples in a dataset into k as equal as possible sized sets that can be used for k-fold cross-validated training.

Template Parameters
Ttype used for calculations (usually pRC::Float<>)
Dnumber of events of the dataset
Parameters
filenamename of the file containing the dataset
knumber of equally sized sets to produce

◆ getRandomSubscripts()

template<class S >
static auto cMHN::getRandomSubscripts ( )
inlinestatic

Generates a uniformly distributed random Subscripts element for a Tensor or a TT.

Template Parameters
SSubscripts Class to generate random element of

◆ learnIndependenceModel()

template<class T , class S >
static auto cMHN::learnIndependenceModel ( std::map< S, T > const & pD)
inlinestatic

Calculate the theta matrix corresponding to the independence model for a given data distribution.

Template Parameters
Ttype used for calculations (inferred)
SSubscripts type used as keys in data distributions (inferred)
Parameters
pDdata distribution

◆ readData()

template<class T , pRC::Size D>
static auto cMHN::readData ( std::string const & filename)
inlinestatic

Reads a dataset from file, where the first line is the header (containing event names) and all subsequent lines contain one sample, with entries separated by commas or spaces.

Template Parameters
Ttype used for calculations (usually pRC::Float<>)
Dnumber of events of the dataset
Parameters
filenamename of the file containing the dataset

◆ readHeader()

static auto cMHN::readHeader ( std::string const & filename)
inlinestatic

Reads the header of a dataset from file.

Parameters
filenamename of the file to read

◆ readTheta()

template<class T , pRC::Size D>
static auto cMHN::readTheta ( std::string const & filename)
inlinestatic

Reads a theta matrix from file, discarding its header.

Template Parameters
Ttype used for calculations
Dnumber of events of the dataset
Parameters
filenamename of the file to read from

◆ writeTheta()

template<class T , pRC::Size D>
static auto cMHN::writeTheta ( std::string const & filename,
std::string const & header,
pRC::Tensor< T, D, D > const & theta,
std::map< std::string, std::string > const & logInfoNames = {},
std::map< std::string, double > const & logInfoNumbers = {} )
inlinestatic

Writes a theta matrix to file, including additional logging information at the bottom.

Template Parameters
Ttype used for calculations (inferred)
Dnumber of events of the dataset (inferred)
Parameters
filenamename of the file to write to
headerheader of the input file, i.e. event names
thetatheta matrix
logInfoNameslogging information as a map string -> string (usually used for score and regulator names)
logInfoNumberslogging information as a map string -> double (usually used for score, time, etc.)