[57223f]: src / modules / glm / samplers / GLMMethod.h  Maximize  Restore  History

Download this file

203 lines (194 with data), 7.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#ifndef GLM_METHOD_H_
#define GLM_METHOD_H_
#include <sampler/SampleMethod.h>
#include <sampler/GraphView.h>
#include <string>
#include <vector>
extern "C" {
#include <cholmod.h>
}
class Graph;
class RNG;
class StochasticNode;
enum GLMFamily {GLM_NORMAL, GLM_BERNOULLI, GLM_BINOMIAL, GLM_POISSON,
GLM_UNKNOWN};
namespace glm {
/**
* @short Abstract class for sampling generalized linear models.
*
* GLMMethod provides a base class for sampling methods that work
* on generalized linear models (GLMs). Most of the update
* machinery is provided by GLMMethod itself. Sub-classes have to
* define member functions that define the inputs to the sampling
* algorithm.
*
* Classes inheriting from GLMMethod typically use auxiliary
* variable sampling. This is a form of data augmentation in
* which additional latent (auxiliary) variables are added to the
* model to reduce it to the form of a linear model with normal
* outcome variables. We also assume that that the regression
* parameters have a prior multivariate normal distribution. Thus
* the posterior distribution is also multivariate normal and the
* the regression parameters can be sampled efficiently in a block.
*
* GLMMethod uses sparse matrix algebra provided by the libraries
* CHOLMOD and CSparse. In the context of a hierarchical model,
* mixed models appear identical to fixed-effect models except
* that mixed models have a design matrix that is sparse. The use
* of CHOLMOD and CSparse, along with auxiliary variable sampling,
* allows us to handle both fixed and random effects in a
* consistent way without needing to distinguish between them, or
* rely on asymptotic approximations.
*/
class GLMMethod : public SampleMethod {
std::vector<double const *> _lp;
protected:
GraphView const *_view;
unsigned int _chain;
std::vector<GraphView const *> _sub_views;
cholmod_sparse *_x;
cholmod_factor *_factor;
private:
std::vector<bool> _fixed;
unsigned int _length_max;
unsigned _nz_prior;
bool _init;
void symbolic();
void calDesign() const;
public:
/**
* Constructor.
*
* @param view Pointer to a GraphView object for all sampled nodes.
*
* @param sub_views Vector of pointers to GraphView objects with
* length equal to the number of sampled nodes. Each sub-view
* corresponds to a single sampled node.
*
* @param chain Number of the chain (starting from 0) to which
* the sampling method will be applied.
*
* @param link Boolean flag that is passed to the utility
* function checkLinear when checking to see if we have a
* valid GLM. If link is true then the last deterministic
* descendents in view (i.e. those with no deterministic
* descendants) may be link nodes.
*/
GLMMethod(GraphView const *view,
std::vector<GraphView const *> const &sub_views,
unsigned int chain, bool link);
/**
* Virtual destructor
*/
virtual ~GLMMethod();
/**
* Updates the regression parameters by treating the GLM as a
* linear model (LM), either by a linear approximation or by
* using auxiliary variables. All regression parameters are
* updated together in a block.
*
* The updateLM function relies on virtual functions which are
* implemented by sub-classes: getPrecision, getValue and
* updateAuxiliary.
*
* In order to provide more flexibility, updateLM has optional
* arguments stochastic and chol.
*
* @param rng Random number generator used for sampling
*
* @param stochastic Logical flag. If true then updateLM draws
* a sample from the posterior distribution of the regression
* parameters. If false then it sets the regression parameters
* to their posterior mean.
*
* @param chol Logical flag. For update
*/
void updateLM(RNG *rng, bool stochastic = true, bool chol=true);
/**
* Updates the regression parameters element-wise (i.e. with
* Gibbs sampling). Although Gibbs sampling less efficient
* than the block-updating provided by updateLM, it is more
* widely applicable. In particular, if the regression
* parameters have a truncated normal prior, the model is
* still amenable to Gibbs sampling.
*/
void updateLMGibbs(RNG *rng);
/**
* Calculates the coefficients of the posterior distribution
* of the regression parameters. GLMMethod uses a canonical
* parametrization (b, A) such that "A" is the posterior
* precision of the parameters and the posterior mean "mu"
* solves (A %*% mu = b).
*
* @param b Dense vector such that (b = A %*% mu), where "mu"
* is the posterior mean and "A" is the posterior precision.
*
* @param A Posterior precision represented as a sparse matrix.
*/
void calCoef(double *&b, cholmod_sparse *&A);
/**
* Returns the linear predictor for the outcome variable with index i.
*/
double getMean(unsigned int i) const;
/**
* Returns the precision of the outcome variable with index i.
* This may be the precision parameter of an auxiliary variable.
*/
virtual double getPrecision(unsigned int i) const = 0;
/**
* Returns the value of the outcome variable with index i.
* This may be an auxiliary variable, rather than the observed
* outcome.
*/
virtual double getValue(unsigned int i) const = 0;
/**
* Updates auxiliary variables. The default method does
* nothing. Sampling methods that use auxiliary variables to
* reduce the GLM to a linear model must provide their own
* implementation.
*
* This function is called by updateLM. Internally, updateLM
* calculates the posterior mean "mu" by solving the equation
* (A %*% mu = b) where "A" is the posterior precision. The
* same dense vector is used to hold "b" (before solving the
* equation) and "mu" (after solving the equation). If
* updateLM is called with parameter "chol" set to false then
* updateAuxiliary is called before solving the equation: thus
* the first argument (y) should contain the canonical
* parameter (b). If updateLM is called with "chol" set to
* true then updateAuxiliary is called after solving the
* equation: thus the first argument (y) should contain the
* posterior mean.
*
* @param y Dense vector which may be either the posterior
* mean "mu" or (A %*% mu), where "A" is the posterior
* precision.
*
* @param N Cholesky factorization of the posterior precision "A".
*
* @param rng Random number generator used for sampling.
*/
virtual void updateAuxiliary(cholmod_dense *y, cholmod_factor *N,
RNG *rng);
/**
* Returns false. Sampling methods inheriting from GLMMethod
* are not adaptive.
*/
bool isAdaptive() const;
/**
* Does nothing, as GLMMethod is not adaptive.
*/
void adaptOff();
/**
* Returns true, as GLMMethod is not adaptive
*/
bool checkAdaptation() const;
/**
* Utility function that classifies the distribution of a
* stochastic node into one of the classes defined by the
* enumeration GLMFamily.
*/
static GLMFamily getFamily(StochasticNode const *snode);
};
}
#endif /* GLM_METHOD_H_ */

Get latest updates about Open Source Projects, Conferences and News.

Sign up for the SourceForge newsletter:





No, thanks