This may happen on the first request due to CSS mimetype issues. Try clearing your browser cache and refreshing.

--- a/src/modules/glm/samplers/GLMMethod.h +++ b/src/modules/glm/samplers/GLMMethod.h @@ -21,7 +21,31 @@ namespace glm { /** - * Abstract method for generalized linear models. + * @short Abstract class for sampling generalized linear models. + * + * GLMMethod provides a base class for sampling methods that work + * on generalized linear models (GLMs). Most of the update + * machinery is provided by GLMMethod itself. Sub-classes have to + * define member functions that define the inputs to the sampling + * algorithm. + * + * Classes inheriting from GLMMethod typically use auxiliary + * variable sampling. This is a form of data augmentation in + * which additional latent (auxiliary) variables are added to the + * model to reduce it to the form of a linear model with normal + * outcome variables. We also assume that that the regression + * parameters have a prior multivariate normal distribution. Thus + * the posterior distribution is also multivariate normal and the + * the regression parameters can be sampled efficiently in a block. + * + * GLMMethod uses sparse matrix algebra provided by the libraries + * CHOLMOD and CSparse. In the context of a hierarchical model, + * mixed models appear identical to fixed-effect models except + * that mixed models have a design matrix that is sparse. The use + * of CHOLMOD and CSparse, along with auxiliary variable sampling, + * allows us to handle both fixed and random effects in a + * consistent way without needing to distinguish between them, or + * rely on asymptotic approximations. */ class GLMMethod : public SampleMethod { std::vector<double const *> _lp; @@ -39,21 +63,141 @@ void symbolic(); void calDesign() const; public: + /** + * Constructor. + * + * @param view Pointer to a GraphView object for all sampled nodes. + * + * @param sub_views Vector of pointers to GraphView objects with + * length equal to the number of sampled nodes. Each sub-view + * corresponds to a single sampled node. + * + * @param chain Number of the chain (starting from 0) to which + * the sampling method will be applied. + * + * @param link Boolean flag that is passed to the utility + * function checkLinear when checking to see if we have a + * valid GLM. If link is true then the last deterministic + * descendents in view (i.e. those with no deterministic + * descendants) may be link nodes. + */ GLMMethod(GraphView const *view, std::vector<GraphView const *> const &sub_views, unsigned int chain, bool link); + /** + * Virtual destructor + */ virtual ~GLMMethod(); - void updateLM(RNG *rng, bool stochastic = true, bool chol=true); + /** + * Updates the regression parameters by treating the GLM as a + * linear model (LM), either by a linear approximation or by + * using auxiliary variables. All regression parameters are + * updated together in a block. + * + * The updateLM function relies on virtual functions which are + * implemented by sub-classes: getPrecision, getValue and + * updateAuxiliary. + * + * In order to provide more flexibility, updateLM has an optional + * arguments stochastic. + * + * @param rng Random number generator used for sampling + * + * @param stochastic Logical flag. If true then updateLM draws + * a sample from the posterior distribution of the regression + * parameters. If false then it sets the regression parameters + * to their posterior mean. + */ + void updateLM(RNG *rng, bool stochastic = true); + /** + * Updates the regression parameters element-wise (i.e. with + * Gibbs sampling). Although Gibbs sampling less efficient + * than the block-updating provided by updateLM, it is more + * widely applicable. In particular, if the regression + * parameters have a truncated normal prior, the model is + * still amenable to Gibbs sampling. + */ void updateLMGibbs(RNG *rng); + /** + * Calculates the coefficients of the posterior distribution + * of the regression parameters. GLMMethod uses a canonical + * parametrization (b, A) such that "A" is the posterior + * precision of the parameters and the posterior mean "mu" + * solves (A %*% mu = b). + * + * @param b Dense vector such that (b = A %*% mu), where "mu" + * is the posterior mean and "A" is the posterior precision. + * + * @param A Posterior precision represented as a sparse matrix. + */ + void calCoef(double *&b, cholmod_sparse *&A); + /** + * Returns the linear predictor for the outcome variable with index i. + */ + double getMean(unsigned int i) const; + /** + * Returns the precision of the outcome variable with index i. + * This may be the precision parameter of an auxiliary variable. + */ + virtual double getPrecision(unsigned int i) const = 0; + /** + * Returns the value of the outcome variable with index i. + * This may be an auxiliary variable, rather than the observed + * outcome. + */ + virtual double getValue(unsigned int i) const = 0; + /** + * Updates auxiliary variables. The default method does + * nothing. Sampling methods that use auxiliary variables to + * reduce the GLM to a linear model must provide their own + * implementation. + * + * This function is called by updateLM. Internally, updateLM + * calculates the posterior mean "mu" by solving the equation + * (A %*% mu = b) where "A" is the posterior precision. The + * same dense vector is used to hold "b" (before solving the + * equation) and "mu" (after solving the equation). If + * updateLM is called with parameter "chol" set to false then + * updateAuxiliary is called before solving the equation: thus + * the first argument (y) should contain the canonical + * parameter (b). If updateLM is called with "chol" set to + * true then updateAuxiliary is called after solving the + * equation: thus the first argument (y) should contain the + * posterior mean. + * + * IMPORTANT NOTE: GLMMethod uses a parameterization in which + * the current value of the parameters is considered the + * origin. The value of "y" (mu or b) may need to be adjusted + * for this centring by an implementation of updateAuxiliary. + * + * @param y Dense vector which may be either the posterior + * mean "mu" or (A %*% mu), where "A" is the posterior + * precision. + * + * @param N Cholesky factorization of the posterior precision "A". + * + * @param rng Random number generator used for sampling. + */ + virtual void updateAuxiliary(cholmod_dense *y, cholmod_factor *N, + RNG *rng); + /** + * Returns false. Sampling methods inheriting from GLMMethod + * are not adaptive. + */ bool isAdaptive() const; + /** + * Does nothing, as GLMMethod is not adaptive. + */ void adaptOff(); + /** + * Returns true, as GLMMethod is not adaptive + */ bool checkAdaptation() const; - void calCoef(double *&, cholmod_sparse *&); - virtual double getMean(unsigned int i) const; - virtual std::string name() const = 0; - virtual double getPrecision(unsigned int i) const = 0; - virtual double getValue(unsigned int i) const = 0; - virtual void updateAuxiliary(cholmod_dense *b, cholmod_factor *N, RNG *rng); + /** + * Utility function that classifies the distribution of a + * stochastic node into one of the classes defined by the + * enumeration GLMFamily. + */ static GLMFamily getFamily(StochasticNode const *snode); };