Diff of /src/modules/glm/samplers/GLMMethod.h [01a998] .. [081fbe] Maximize Restore

  Switch to unified view

a/src/modules/glm/samplers/GLMMethod.h b/src/modules/glm/samplers/GLMMethod.h
...
...
19
        GLM_UNKNOWN};
19
        GLM_UNKNOWN};
20
20
21
namespace glm {
21
namespace glm {
22
22
23
    /**
23
    /**
24
     * Abstract method for generalized linear models.
24
     * @short Abstract class for sampling generalized linear models.
25
     *
26
     * GLMMethod provides a base class for sampling methods that work
27
     * on generalized linear models (GLMs).  Most of the update
28
     * machinery is provided by GLMMethod itself. Sub-classes have to
29
     * define member functions that define the inputs to the sampling
30
     * algorithm.
31
     *
32
     * Classes inheriting from GLMMethod typically use auxiliary
33
     * variable sampling.  This is a form of data augmentation in
34
     * which additional latent (auxiliary) variables are added to the
35
     * model to reduce it to the form of a linear model with normal
36
     * outcome variables.  We also assume that that the regression
37
     * parameters have a prior multivariate normal distribution.  Thus
38
     * the posterior distribution is also multivariate normal and the
39
     * the regression parameters can be sampled efficiently in a block.
40
     *
41
     * GLMMethod uses sparse matrix algebra provided by the libraries
42
     * CHOLMOD and CSparse. In the context of a hierarchical model,
43
     * mixed models appear identical to fixed-effect models except
44
     * that mixed models have a design matrix that is sparse.  The use
45
     * of CHOLMOD and CSparse, along with auxiliary variable sampling,
46
     * allows us to handle both fixed and random effects in a
47
     * consistent way without needing to distinguish between them, or
48
     * rely on asymptotic approximations.
25
     */
49
     */
26
    class GLMMethod : public SampleMethod {
50
    class GLMMethod : public SampleMethod {
27
    std::vector<double const *> _lp;
51
    std::vector<double const *> _lp;
28
    protected:
52
    protected:
29
    GraphView const *_view;
53
    GraphView const *_view;
...
...
37
    unsigned _nz_prior;
61
    unsigned _nz_prior;
38
    bool _init;
62
    bool _init;
39
    void symbolic();
63
    void symbolic();
40
    void calDesign() const;
64
    void calDesign() const;
41
    public:
65
    public:
66
  /**
67
   * Constructor.
68
   *
69
   * @param view Pointer to a GraphView object for all sampled nodes.
70
   *
71
   * @param sub_views Vector of pointers to GraphView objects with
72
   * length equal to the number of sampled nodes. Each sub-view
73
   * corresponds to a single sampled node.
74
   *
75
   * @param chain Number of the chain (starting from 0) to which
76
   * the sampling method will be applied.
77
   *
78
   * @param link Boolean flag that is passed to the utility
79
   * function checkLinear when checking to see if we have a
80
   * valid GLM. If link is true then the last deterministic
81
   * descendents in view (i.e. those with no deterministic
82
   * descendants) may be link nodes.
83
   */
42
    GLMMethod(GraphView const *view, 
84
    GLMMethod(GraphView const *view, 
43
          std::vector<GraphView const *> const &sub_views,
85
          std::vector<GraphView const *> const &sub_views,
44
          unsigned int chain, bool link);
86
          unsigned int chain, bool link);
87
  /**
88
   * Virtual destructor
89
   */
45
    virtual ~GLMMethod();
90
    virtual ~GLMMethod();
91
  /**
92
   * Updates the regression parameters by treating the GLM as a
93
   * linear model (LM), either by a linear approximation or by
94
   * using auxiliary variables. All regression parameters are
95
   * updated together in a block.
96
   *
97
   * The updateLM function relies on virtual functions which are
98
   * implemented by sub-classes: getPrecision, getValue and
99
   * updateAuxiliary.
100
   *
101
   * In order to provide more flexibility, updateLM has an optional
102
   * arguments stochastic.
103
   *
104
   * @param rng Random number generator used for sampling
105
   *
106
   * @param stochastic Logical flag. If true then updateLM draws
107
   * a sample from the posterior distribution of the regression
108
   * parameters. If false then it sets the regression parameters
109
   * to their posterior mean.
110
   */
46
    void updateLM(RNG *rng, bool stochastic = true, bool chol=true);
111
    void updateLM(RNG *rng, bool stochastic = true);
112
  /**
113
   * Updates the regression parameters element-wise (i.e. with
114
   * Gibbs sampling).  Although Gibbs sampling less efficient
115
   * than the block-updating provided by updateLM, it is more
116
   * widely applicable.  In particular, if the regression
117
   * parameters have a truncated normal prior, the model is
118
   * still amenable to Gibbs sampling.
119
   */
47
    void updateLMGibbs(RNG *rng);
120
    void updateLMGibbs(RNG *rng);
121
  /**
122
   * Calculates the coefficients of the posterior distribution
123
   * of the regression parameters. GLMMethod uses a canonical
124
   * parametrization (b, A) such that "A" is the posterior
125
   * precision of the parameters and the posterior mean "mu"
126
   * solves (A %*% mu = b).
127
   *
128
   * @param b Dense vector such that (b = A %*% mu), where "mu"
129
   * is the posterior mean and "A" is the posterior precision.
130
   *
131
   * @param A Posterior precision represented as a sparse matrix.
132
   */
133
  void calCoef(double *&b, cholmod_sparse *&A);
134
  /**
135
   * Returns the linear predictor for the outcome variable with index i.
136
   */
137
  double getMean(unsigned int i) const;
138
  /**
139
   * Returns the precision of the outcome variable with index i.
140
   * This may be the precision parameter of an auxiliary variable.
141
   */
142
  virtual double getPrecision(unsigned int i) const = 0;
143
  /**
144
   * Returns the value of the outcome variable with index i.
145
   * This may be an auxiliary variable, rather than the observed
146
   * outcome.
147
   */
148
  virtual double getValue(unsigned int i) const = 0;
149
  /**
150
   * Updates auxiliary variables.  The default method does
151
   * nothing.  Sampling methods that use auxiliary variables to
152
   * reduce the GLM to a linear model must provide their own
153
   * implementation.
154
   *
155
   * This function is called by updateLM.  Internally, updateLM
156
   * calculates the posterior mean "mu" by solving the equation
157
   * (A %*% mu = b) where "A" is the posterior precision. The
158
   * same dense vector is used to hold "b" (before solving the
159
   * equation) and "mu" (after solving the equation).  If
160
   * updateLM is called with parameter "chol" set to false then
161
   * updateAuxiliary is called before solving the equation: thus
162
   * the first argument (y) should contain the canonical
163
   * parameter (b).  If updateLM is called with "chol" set to
164
   * true then updateAuxiliary is called after solving the
165
   * equation: thus the first argument (y) should contain the
166
   * posterior mean.
167
   *
168
   * IMPORTANT NOTE: GLMMethod uses a parameterization in which
169
   * the current value of the parameters is considered the
170
   * origin. The value of "y" (mu or b) may need to be adjusted
171
   * for this centring by an implementation of updateAuxiliary.
172
   * 
173
   * @param y Dense vector which may be either the posterior
174
   * mean "mu" or (A %*% mu), where "A" is the posterior
175
   * precision.
176
   *
177
   * @param N Cholesky factorization of the posterior precision "A".
178
   *
179
   * @param rng Random number generator used for sampling.
180
   */
181
  virtual void updateAuxiliary(cholmod_dense *y, cholmod_factor *N, 
182
                   RNG *rng);
183
  /**
184
   * Returns false. Sampling methods inheriting from GLMMethod
185
   * are not adaptive.
186
   */
48
    bool isAdaptive() const;
187
    bool isAdaptive() const;
188
  /**
189
   * Does nothing, as GLMMethod is not adaptive.
190
   */
49
    void adaptOff();
191
    void adaptOff();
192
  /**
193
   * Returns true, as GLMMethod is not adaptive
194
   */
50
    bool checkAdaptation() const;
195
    bool checkAdaptation() const;
51
  void calCoef(double *&, cholmod_sparse *&);
196
  /**
52
  virtual double getMean(unsigned int i) const;
197
   * Utility function that classifies the distribution of a
53
  virtual std::string name() const = 0;
198
   * stochastic node into one of the classes defined by the
54
  virtual double getPrecision(unsigned int i) const = 0;
199
   * enumeration GLMFamily.
55
  virtual double getValue(unsigned int i) const = 0;
200
   */
56
  virtual void updateAuxiliary(cholmod_dense *b, cholmod_factor *N, RNG *rng);
57
    static GLMFamily getFamily(StochasticNode const *snode);
201
    static GLMFamily getFamily(StochasticNode const *snode);
58
    };
202
    };
59
203
60
}
204
}
61
#endif /* GLM_METHOD_H_ */
205
#endif /* GLM_METHOD_H_ */