#############################################################
### function remef (REMove EFfects)
# remove random factors variance and fixed effects from the dependent variable of an LMM analysis
# by Sven Hohenstein, Reinhold Kliegl, 2011, 2012, 2013
# v0.6.10, December 2013
## VERSION HISTORY:
# v0.6.10, December 2013:
# - an error occurred with glmerMod objects; fixed
# v0.6.9, July 2013:
# - function now works with glmerMod models created with glmer
# v0.6.8, June 2013:
# - function now uses the new lme4 function for 'lmerMod' objects
# (lme4Eigen is no longer supported)
# - the plot parameter is deprecated
# - minor code changes
# v0.6.7, February 2012:
# - the argument "all" (string) for the parameter 'ran'
# selects all random effects
# v0.6.6, February 2012:
# - the function now works with objects created by the library
# lme4Eigen too
# v0.6.5 February 2012:
# - former parameter 'link' is now called 'family'
# - family parameter is specified as object (not string),
# like, e.g., in lmer()
# - functions are called from the correct package
# - new parameter 'plot': if TRUE, a plot of the uncorrected
# and corrected dependent variable is displayed
# v0.6.2 August 2011
# - a bug occuring when NULL was part of the random effects list
# was removed
# v0.6, June 2011
# - a bug was removed
# v0.54, June 2011:
# - new parameter 'link' specifying the logit link function; either
# "identity" or "logit"
# - redundant entries are removed from the vectors in the list 'ran'
# v0.54, June 2011:
# - Fixed effects (in the vector 'fix') can be integers or strings
# (e.g., c(2:4, "Effect1", 6, "Effect2") )
# v0.52, May 2011
# - Correction: If grouping was TRUE and a specified fixed main effect of a
# factor was not present in any interaction, the function failed
# v0.51, May 2011:
# - parameter 'grouping' also works if keep = FALSE, if grouping is TRUE,
# the effect and all adssociated ones of higher order are chosen to be
# removed
# v0.5, May 2011:
# - new boolean parameter: 'grouping'; if both keep and grouping are TRUE,
# the effects in fix and all subeffects are chosen to be kept
# Note: In the present version, if keep is FALSE, grouping will be ignored
# v0.4a, May 2011:
# - if keep is TRUE, the to be kept effects are not removed from the
# dependent variable but are actually added to the residuals
# - more efficient processing and less operations (if keep is set properly)
# v0.4, April 2011:
# - new parameter 'keep' allows the remove the effects not specified
# - more efficient processing
# v0.32, April 2011:
# - Correction: If random effects of random factors with order numbers < 1
# were specified, the output was not correct.
# v0.3a, April 2011:
# changes to last version (v0.3):
# - more efficient processing
# v0.3, April 2011:
# changes to last version (v0.2):
# - now it is possible to remove all random effects (even random slopes)
# - the parameter 'ran' must be a list of vectors
# - switched order position of parameters 'fix' and 'ran'
# v0.2, February 2011:
# changes to last version (v0.1):
# - Correction: If just one fixed effect was specified, the function failed.
# Now it is possible to specify a single number as parameter 'fix'
## INPUT:
# model: an object of class 'mer' (lme4 packge), 'lmerMod' (lme4Eigen package)
# ran: list of vectors of natural numbers according to random effects of the model;
# the maximum length of this list is the number of random factors;
# each list member includes the numbers of the random effects (of the current random factor)
# (e.g., list(1:2, NULL, c(1, 3)) )
# default value: NULL (no random-factor related variance is removed)
# fix: vector of natural numbers or strings according to fixed effects of the model
# (e.g., c(2:4, 6, 8:10), c("Effect1", "Effect2"), or c(2:4, "Effect1", 6, "Effect2"))
# Vectors can consist of both integers and strings (R will transform all intergers to
# strings if at least one string is entered);
# redundant values will be ignored (one effect can't be removed more than once)
# default value: NULL (no fixed effects are removed)
# keep: logical value; if TRUE, the specified effects are not removed but kept
# (and all other effects are removed)
# grouping: logical value; if FALSE the specified effects in fix are used;
# if TRUE, effects are grouped:
# - if keep=TRUE, the specified effect (and all effects associated with the
# variables) as well as all effects of lower order consisting solitary of
# a subset of the variables of the specified effet are kept
# (e.g., if the interaction A:B:C is specified, the following effects are
# chosen: A:B:C, A:B, A:C, B:C, A, B, and C)
# - if keep=FALSE, the specified effect (and all effects associated with the
# variables) as well as all effects of higher order consisting of
# all variables of the specified effet (and other variables) are removed
# (e.g., if the interaction A:B:C is specified, the following effects are
# chosen: A:B:C and A:B:C:x)
# family: a family object including a link function;
# either "identity" (default) or "logit" link function; this must be the
# link function which was used for generating the model;
# - gaussian(link = "identity") is the right option for most purposes, e.g. Gaussian
# distributions; the dependent variable is not transformed
# - binomial(link = "logit") is used for binomial dependent variables; the output vector
# will contain probabilities which can be rounded to obtain data in the
# original metric (zeroes and ones); note that even if keep=FALSE, the
# base for the calculation are not the input values but the residuals
## OUTPUT: a numerical vector
## How does it work?
# Random factor variance and fixed effects are removed from the dependent variable of an LMM analysis.
# The order of effects in the fixed-effects input vector is the same as in the model output.
# The returned vector includes the dependent variable corrected by the specified effects.
# Note. The currect version of this function works for the "identity" and "logit" model link functions
# only.
remef <- function(model, fix = NULL, ran = NULL, keep = FALSE, grouping = FALSE, family = gaussian(link = "identity"), plot = FALSE) {
# name of link function
mclass <- class(model)
sup_classes <- c("mer", "lmerMod", "glmerMod") # supported object classes
if (!mclass %in% c(sup_classes)) stop("This class is not supported yet.")
fixef.fnc <- lme4::fixef
ranef.fnc <- lme4::ranef
moma <- model.matrix(model)
# model matrix
if (is.function(family)) family <- family()
link <- family$link
if (!(link %in% c("identity", "logit"))) stop("Unknown link function specified.")
if (keep || link == "logit") {
#DV <- lme4::residuals(model) # use residuals as base and add effects
DV <- residuals(model)
} else {
# use actual data as base and remove effects (this is not possible with the logit link function)
DV <- model@frame[ , 1]
}
fix <- unique(fix) # remove redundant values
if (any(is.character(fix))) { # strings were entered as fixed effects
suppressWarnings( fix.num <- as.numeric(fix) )
# fix.num, numerical fix vector
ef.names.idx <- is.na(fix.num)
# logical vector indicating the positions of effect names in the fix vector
for (ef in fix[ef.names.idx]) {
if(!any(ef == names(fixef.fnc(model)))) stop(paste("The fixed effect", ef, "is not present in the model."))
# stops the function if any fixed effect string is not present in the model
}
fix.num[ef.names.idx] <- which( names(fixef.fnc(model)) %in% fix[ef.names.idx] )
fix <- unique(fix.num)
}
if (grouping & length(fix) > 0) {
new.fix <- NULL # help variable for the construction of a new 'fix' vector
for (ef in fix) { # choose the specified effects and all lower-order/higher-order effects of the relevant variables
# var.str <- attr(lme4::model.matrix(model), "assign")
var.str <- attr(moma, "assign")
# var.str, structure of the variables
if (!var.str[ef]) { # the specified effect is the intercept
new.fix <- unique(c(new.fix, ef))
} else { # the specified effect is NOT the intercept
var.mat <- attr(terms(model), "factors")
# var.mat, variable matrix
ef.order <- attr(terms(model), "order")
# ef.order, order of effects (0: intercept, 1: main effect, 2: two-factor interaction, etc.)
if (keep) { # choose specified effect an all associated ones of lower order
ass.ef <- which( var.str %in% which( colSums( matrix( var.mat[ var.mat[ , var.str[ef] ] == 1, ] , ncol = length(ef.order)) ) == ef.order ) )
# ass.ef, effects associated with the input effect (integer(s))
} else { # choose specified effect an all associated ones of higher order
ass.ef <- which( var.str %in% which( colSums( matrix( var.mat[ var.mat[ , var.str[ef] ] == 1, ] , ncol = length(ef.order)) ) == ef.order[var.str[ef]] ) )
}
new.fix <- unique(c(new.fix, ass.ef))
}
}
fix <- new.fix
}
if (!keep && link == "logit") { # keep effects!
fix <- setdiff(seq_along(fixef.fnc(model)), fix)
}
# remove random factor variance
if (identical(ran, "all")) {
ran <- lapply(ranef(model), seq_along)
}
if (length(ran) > 0) {
rf_before.end.at <- -1
# for (rf in 1 : length(ran)) {
for (rf in seq_along(ranef.fnc(model))) { # rf, random factor
if (rf > length(ran)) { # non-specified random factors
if (!keep && link == "logit") { # keep effects!
ran[[rf]] <- seq.int(ncol(ranef.fnc(model)[[rf]]))
} else {
ran[[rf]] <- NULL
}
} else {
if(!is.null(ran[[rf]])) ran[[rf]] <- unique(ran[[rf]])
if (!keep && link == "logit") { # keep effects!
ran[[rf]] <- setdiff(seq.int(ncol(ranef.fnc(model)[[rf]])), ran[[rf]])
}
}
n.rf.levels <- nrow(ranef.fnc(model)[[rf]])
# n.rf.levels, number of random-factor levels
for (re in ran[[rf]]) { # re, random effect (e.g., intercept, slope)
idx.re <- ( ((re - 1) * n.rf.levels + 1) : (re * n.rf.levels) ) + (rf_before.end.at + 1)
#idx.re, numbers of lines of the (transpose) random effect matrix for the currect random effect
if (mclass %in% c("lmerMod", "glmerMod")) re.matrix <- model@pp$Zt else re.matrix <- model@Zt
DV <- DV + sign((keep || link == "logit") - 0.5) * ( as.vector( ranef.fnc(model)[[rf]][ , re] %*% re.matrix[idx.re, ] ) )
# Note. re.matrix is the transpose sparse random-effect matrix (class dgCMatrix)
# Note. sign(keep - 0.5) is +1 if keep==TRUE and -1 otherwise
}
rf_before.end.at <- rf_before.end.at + prod(dim(ranef.fnc(model)[[rf]]))
# at which line of the random-effect matrix did the current random effect end?
}
}
# remove fixed effects
DV <- DV + sign((keep || link == "logit") - 0.5) * ( matrix(moma[ , fix], nrow = length(DV)) %*% fixef.fnc(model)[fix] )
if (link == "logit") DV <- ( 1 / (1 + exp(- DV)) )
# if DV is rounded, zeros and ones will result
if (plot) {
warning("The 'plot' parameter is deprecated.")
}
return(as.vector(DV))
}
#############################################################