Source code for pymer4.models.lmer

  1from .base import enable_logging, requires_fit, model
  2from ..tidystats.broom import tidy
  3from ..tidystats.lmerTest import ranef, lmer as lmer_, bootMer
  4from ..tidystats.multimodel import coef
  5from ..tidystats.easystats import get_param_names, is_converged
  6from ..tidystats.tables import summary_lmm_table
  7from ..expressions import logit2odds
  8from polars import DataFrame, col
  9import polars.selectors as cs
 10from rpy2.robjects import NULL, NA_Real
 11
 12

[docs]
 13class lmer(model):
 14    """Linear mixed effects model estimated via ML/REML. Inherits from ``lm``.
 15
 16    This class implements linear mixed effects models using Maximum Likelihood or
 17    Restricted Maximum Likelihood estimation. It extends the base linear model class
 18    to handle random effects and nested data structures.
 19
 20    Args:
 21        formula (str): R-style formula specifying the model, including random effects
 22        data (DataFrame): Input data for the model
 23    """
 24
 25    def __init__(self, formula, data, **kwargs):
 26        """Initialize the linear mixed effects model.
 27
 28        Args:
 29            formula (str): R-style formula specifying the model, including random effects
 30            data (DataFrame): Input data for the model
 31        """
 32        super().__init__(formula, data, **kwargs)
 33        self._r_func = lmer_
 34        self._summary_func = summary_lmm_table
 35        self.fixef = None
 36        self.ranef = None
 37        self.ranef_var = None
 38        self.convergence_status = None
 39        self._REML = self._init_kwargs.get("REML", True)
 40
 41    def _handle_rfx(self, **kwargs):
 42        """Sets `.ranef_var` using ``broom.mixed::tidy()`` and ``lme4::ranef()`` and ``lme4::coef()`` to get random effects and BLUPs. Manually exponentiates random effects if ``exponentiate=True`` since ``broom.mixed::tidy()`` does not do this."""
 43
 44        self.ranef_var = tidy(
 45            self.r_model, effects="ran_pars", conf_int=True, **kwargs
 46        ).drop("effect", strict=False)
 47        self.ranef = ranef(self.r_model)
 48        self.fixef = coef(self.r_model)
 49
 50        # Ensure multiple rfx are returned as a dict
 51        if isinstance(self.fixef, list):
 52            fixed_names, random_names = get_param_names(self.r_model)
 53            self.fixef = dict(zip(random_names.keys(), self.fixef))
 54            self.ranef = dict(zip(random_names.keys(), self.ranef))
 55
 56        # Exponentiate params if requested
 57        exponentiate = kwargs.get("exponentiate", False)
 58        if exponentiate:
 59            if isinstance(self.fixef, dict):
 60                self.fixef = {
 61                    k: v.with_columns(col("level"), logit2odds(cs.exclude("level")))
 62                    for k, v in self.fixef.items()
 63                }
 64            else:
 65                self.fixef = self.fixef.with_columns(
 66                    col("level"), logit2odds(cs.exclude("level"))
 67                )
 68            if isinstance(self.ranef, dict):
 69                self.ranef = {
 70                    k: v.with_columns(col("level"), logit2odds(cs.exclude("level")))
 71                    for k, v in self.ranef.items()
 72                }
 73            else:
 74                self.ranef = self.ranef.with_columns(
 75                    col("level"), logit2odds(cs.exclude("level"))
 76                )
 77
 78    def _bootstrap(
 79        self,
 80        nboot,
 81        save_boots,
 82        conf_method="perc",
 83        parallel="multicore",
 84        ncpus=4,
 85        conf_level=0.95,
 86        **kwargs,
 87    ):
 88        """Get bootstrapped estimates of model parameters using `lme4's confint.merMod <https://www.rdocumentation.org/packages/lme4/versions/1.1-36/topics/confint.merMod>`_. Unlike with `lm()`, we don't use `easystats` functions because they don't return the full bootstrap distribution for rfx, only ffx. We use `tidy` to summarize the bootstrap distributions and can therefore can use all the `conf_method` that it supports (e.g. `"perc"`, `"bca"`, `"norm"`, `"basic"`).
 89
 90        Args:
 91            conf_method (str, optional): Type of bootstrap confidence intervals. Defaults to "perc"
 92            nboot (int, optional): Number of bootstrap samples. Defaults to 1000
 93            parallel (str, optional): Parallelization method. Defaults to "multicore"
 94            ncpus (int, optional): Number of CPUs to use. Defaults to 4
 95            conf_level (float, optional): Confidence level for intervals. Defaults to 0.95
 96            save_boots (bool, optional): Whether to save bootstrap samples. Defaults to True
 97            **kwargs: Additional arguments passed to confint
 98
 99        Returns:
100            tuple: (fix_cis, rfx_cis) - Fixed effects CIs and random effects CIs as polars DataFrames
101        """
102        cis, boots = bootMer(
103            self.r_model,
104            nsim=nboot,
105            conf_level=conf_level,
106            conf_method=conf_method,
107            parallel=parallel,
108            ncpus=ncpus,
109            save_boots=save_boots,
110            **kwargs,
111        )
112        self.cis = cis
113
114        # Fixed CIs
115        fixed_names = self.params["term"].to_list()
116        fixed_lower = (
117            cis.filter(col("term").is_in(fixed_names)).select("conf_low").to_series()
118        )
119        fixed_upper = (
120            cis.filter(col("term").is_in(fixed_names)).select("conf_high").to_series()
121        )
122        self.result_fit = self.result_fit.with_columns(
123            conf_low=fixed_lower,
124            conf_high=fixed_upper,
125        )
126
127        # Drop fixed-effect rows and split out term col to term and group cols
128        ranef_cis = (
129            cis.filter(~col("term").is_in(fixed_names))
130            .with_columns(
131                col("term")
132                .str.split_exact("___", 2)
133                .explode()
134                .struct.rename_fields(["term", "group"])
135                .struct.unnest()
136            )
137            .select("group", "term", "conf_low", "conf_high")
138        )
139        self.ranef_var = self.ranef_var.drop("conf_low", "conf_high").join(
140            ranef_cis, on=["term", "group"]
141        )
142
143        if save_boots:
144            self.result_boots = boots
145
146    @enable_logging
147    def fit(
148        self,
149        summary=False,
150        conf_method="satterthwaite",
151        nboot=1000,
152        save_boots=True,
153        parallel="multicore",
154        ncpus=4,
155        conf_type="perc",
156        bootMer_kwargs={},
157        **kwargs,
158    ):
159        """Fit a linear mixed effects model using ``lmer()`` in R with Satterthwaite degrees of freedom and p-values calculated using ``lmerTest``.
160
161        Args:
162            summary (bool, optional): Whether to return the model summary. Defaults to False
163            conf_method (str, optional): Method for confidence interval calculation. Defaults to ``"satterthwaite"``. Alternatively, ``"boot"`` for bootstrap CIs.
164            nboot (int, optional): Number of bootstrap samples. Defaults to 1000
165            parallel (str, optional): Parallelization for bootstrapping. Defaults to "multicore"
166            ncpus (int, optional): Number of cores to use for parallelization. Defaults to 4
167            conf_type (str, optional): Type of confidence interval to calculate. Defaults to "perc"
168
169        Returns:
170            GT, optional: Model summary if ``summary=True``
171        """
172
173        # Use super to get fixed effects via easystats::model_parameters()
174        if conf_method == "boot":
175            if self.family is None:
176                default_conf_method = "satterthwaite"
177            else:
178                default_conf_method = "wald"
179        super().fit(
180            conf_method=conf_method if conf_method != "boot" else default_conf_method,
181            effects="fixed",
182            ci_random=False,
183            parallel=parallel,
184            ncpus=ncpus,
185            conf_type=conf_type,
186            **kwargs,
187        )
188
189        # Store the conf_method in the fit_kwargs since we overwrite it in the super call
190        self._fit_kwargs["conf_method"] = conf_method
191
192        # Get random effects
193        self._handle_rfx(**kwargs)
194
195        if conf_method == "boot":
196            self._bootstrap(
197                nboot=nboot,
198                save_boots=save_boots,
199                conf_method=conf_type,
200                parallel=parallel,
201                ncpus=ncpus,
202                **bootMer_kwargs,
203            )
204
205        # Handle convergence & singularity warnings
206        did_converge, message = is_converged(self.r_model)
207        self.convergence_status = message
208        if not did_converge:
209            self.r_console.append(message)
210
211        if summary:
212            return self.summary()
213
214    @enable_logging
215    def anova(
216        self,
217        summary=False,
218        auto_ss_3=True,
219        jointtest_kwargs={"mode": "satterthwaite", "lmer_df": "satterthwaite"},
220        anova_kwargs={},
221    ):
222        """Calculate a Type-III ANOVA table for the model using ``joint_tests()`` in R.
223
224        Args:
225            summary (bool): whether to return the ANOVA summary. Defaults to False
226            auto_ss_3 (bool): whether to automatically use balanced contrasts when calculating the result via `joint_tests()`. When False, will use the contrasts specified with `set_contrasts()` which defaults to `"contr.treatment"` and R's `anova()` function; Default is True.
227            jointtest_kwargs (dict): additional arguments to pass to `joint_tests()` Defaults to using Satterthwaite degrees of freedom
228            anova_kwargs (dict): additional arguments to pass to `anova()`
229        """
230        super().anova(
231            auto_ss_3=auto_ss_3,
232            jointtest_kwargs=jointtest_kwargs,
233            anova_kwargs=anova_kwargs,
234        )
235        if summary:
236            return self.summary_anova()
237
238    @enable_logging
239    @requires_fit
240    def emmeans(self, marginal_var, by=None, p_adjust="sidak", **kwargs):
241        """Compute marginal means and/or contrasts between factor levels. ``marginal_var`` is the predictor whose levels will have means or contrasts computed. ``by`` is an optional predictor to marginalize over. If ``contrasts`` is not specified, only marginal means are returned
242
243        Args:
244            marginal_var (str): name of predictor to compute means or contrasts for
245            by (str/list): additional predictors to marginalize over
246            contrasts (str | 'pairwise' | 'poly' | dict | None, optional): how to specify comparison within `marginal_var`. Defaults to None.
247            p_adjust (str): multiple comparisons adjustment method. One of: none, tukey (default), bonf, sidak, fdr, holm, dunnet, mvt (monte-carlo multi-variate T, aka exact tukey/dunnet).
248
249        Returns:
250            DataFrame: Table of marginal means or contrasts
251        """
252
253        return super().emmeans(
254            marginal_var,
255            by,
256            mode="satterthwaite",
257            lmer_df="satterthwaite",
258            lmerTest_limit=999999,
259            p_adjust=p_adjust,
260            **kwargs,
261        )
262
263    def predict(self, data: DataFrame, use_rfx=True, **kwargs):
264        """Make predictions using new data.
265
266        Args:
267            data (DataFrame): Input data for predictions
268            use_rfx (bool, optional): Whether to include random effects in predictions. Defaults to True. Equivalent to ``re.form = NULL`` in R if True, ``re.form = NA`` if False
269            **kwargs: Additional arguments passed to predict function
270
271        Returns:
272            ndarray: Predicted values
273        """
274        re_form = NULL if use_rfx else NA_Real
275        return super().predict(data, re_form=re_form, **kwargs)
276
277    @requires_fit
278    def simulate(self, nsim: int = 1, use_rfx=True, **kwargs):
279        """Simulate values from the fitted model.
280
281        Args:
282            nsim (int, optional): Number of simulations to run. Defaults to 1
283            use_rfx (bool, optional): Whether to include random effects in simulations. Defaults to True.
284                Equivalent to ``re.form = NULL`` in R if True, ``re.form = NA`` if False
285            **kwargs: Additional arguments passed to simulate function
286
287        Returns:
288            DataFrame: Simulated values with the same number of rows as the original data
289                and columns equal to nsim
290        """
291        re_form = NULL if use_rfx else NA_Real
292        return super().simulate(nsim, re_form=re_form, **kwargs)