############################################################# ### function remef (REMove EFfects) # remove random factors variance and fixed effects from the dependent variable of an LMM analysis # by Sven Hohenstein, Reinhold Kliegl, 2011, 2012, 2013 # v0.6.10, December 2013 ## VERSION HISTORY: # v0.6.10, December 2013: # - an error occurred with glmerMod objects; fixed # v0.6.9, July 2013: # - function now works with glmerMod models created with glmer # v0.6.8, June 2013: # - function now uses the new lme4 function for 'lmerMod' objects # (lme4Eigen is no longer supported) # - the plot parameter is deprecated # - minor code changes # v0.6.7, February 2012: # - the argument "all" (string) for the parameter 'ran' # selects all random effects # v0.6.6, February 2012: # - the function now works with objects created by the library # lme4Eigen too # v0.6.5 February 2012: # - former parameter 'link' is now called 'family' # - family parameter is specified as object (not string), # like, e.g., in lmer() # - functions are called from the correct package # - new parameter 'plot': if TRUE, a plot of the uncorrected # and corrected dependent variable is displayed # v0.6.2 August 2011 # - a bug occuring when NULL was part of the random effects list # was removed # v0.6, June 2011 # - a bug was removed # v0.54, June 2011: # - new parameter 'link' specifying the logit link function; either # "identity" or "logit" # - redundant entries are removed from the vectors in the list 'ran' # v0.54, June 2011: # - Fixed effects (in the vector 'fix') can be integers or strings # (e.g., c(2:4, "Effect1", 6, "Effect2") ) # v0.52, May 2011 # - Correction: If grouping was TRUE and a specified fixed main effect of a # factor was not present in any interaction, the function failed # v0.51, May 2011: # - parameter 'grouping' also works if keep = FALSE, if grouping is TRUE, # the effect and all adssociated ones of higher order are chosen to be # removed # v0.5, May 2011: # - new boolean parameter: 'grouping'; if both keep and grouping are TRUE, # the effects in fix and all subeffects are chosen to be kept # Note: In the present version, if keep is FALSE, grouping will be ignored # v0.4a, May 2011: # - if keep is TRUE, the to be kept effects are not removed from the # dependent variable but are actually added to the residuals # - more efficient processing and less operations (if keep is set properly) # v0.4, April 2011: # - new parameter 'keep' allows the remove the effects not specified # - more efficient processing # v0.32, April 2011: # - Correction: If random effects of random factors with order numbers < 1 # were specified, the output was not correct. # v0.3a, April 2011: # changes to last version (v0.3): # - more efficient processing # v0.3, April 2011: # changes to last version (v0.2): # - now it is possible to remove all random effects (even random slopes) # - the parameter 'ran' must be a list of vectors # - switched order position of parameters 'fix' and 'ran' # v0.2, February 2011: # changes to last version (v0.1): # - Correction: If just one fixed effect was specified, the function failed. # Now it is possible to specify a single number as parameter 'fix' ## INPUT: # model: an object of class 'mer' (lme4 packge), 'lmerMod' (lme4Eigen package) # ran: list of vectors of natural numbers according to random effects of the model; # the maximum length of this list is the number of random factors; # each list member includes the numbers of the random effects (of the current random factor) # (e.g., list(1:2, NULL, c(1, 3)) ) # default value: NULL (no random-factor related variance is removed) # fix: vector of natural numbers or strings according to fixed effects of the model # (e.g., c(2:4, 6, 8:10), c("Effect1", "Effect2"), or c(2:4, "Effect1", 6, "Effect2")) # Vectors can consist of both integers and strings (R will transform all intergers to # strings if at least one string is entered); # redundant values will be ignored (one effect can't be removed more than once) # default value: NULL (no fixed effects are removed) # keep: logical value; if TRUE, the specified effects are not removed but kept # (and all other effects are removed) # grouping: logical value; if FALSE the specified effects in fix are used; # if TRUE, effects are grouped: # - if keep=TRUE, the specified effect (and all effects associated with the # variables) as well as all effects of lower order consisting solitary of # a subset of the variables of the specified effet are kept # (e.g., if the interaction A:B:C is specified, the following effects are # chosen: A:B:C, A:B, A:C, B:C, A, B, and C) # - if keep=FALSE, the specified effect (and all effects associated with the # variables) as well as all effects of higher order consisting of # all variables of the specified effet (and other variables) are removed # (e.g., if the interaction A:B:C is specified, the following effects are # chosen: A:B:C and A:B:C:x) # family: a family object including a link function; # either "identity" (default) or "logit" link function; this must be the # link function which was used for generating the model; # - gaussian(link = "identity") is the right option for most purposes, e.g. Gaussian # distributions; the dependent variable is not transformed # - binomial(link = "logit") is used for binomial dependent variables; the output vector # will contain probabilities which can be rounded to obtain data in the # original metric (zeroes and ones); note that even if keep=FALSE, the # base for the calculation are not the input values but the residuals ## OUTPUT: a numerical vector ## How does it work? # Random factor variance and fixed effects are removed from the dependent variable of an LMM analysis. # The order of effects in the fixed-effects input vector is the same as in the model output. # The returned vector includes the dependent variable corrected by the specified effects. # Note. The currect version of this function works for the "identity" and "logit" model link functions # only. remef <- function(model, fix = NULL, ran = NULL, keep = FALSE, grouping = FALSE, family = gaussian(link = "identity"), plot = FALSE) { # name of link function mclass <- class(model) sup_classes <- c("mer", "lmerMod", "glmerMod") # supported object classes if (!mclass %in% c(sup_classes)) stop("This class is not supported yet.") fixef.fnc <- lme4::fixef ranef.fnc <- lme4::ranef moma <- model.matrix(model) # model matrix if (is.function(family)) family <- family() link <- family$link if (!(link %in% c("identity", "logit"))) stop("Unknown link function specified.") if (keep || link == "logit") { #DV <- lme4::residuals(model) # use residuals as base and add effects DV <- residuals(model) } else { # use actual data as base and remove effects (this is not possible with the logit link function) DV <- model@frame[ , 1] } fix <- unique(fix) # remove redundant values if (any(is.character(fix))) { # strings were entered as fixed effects suppressWarnings( fix.num <- as.numeric(fix) ) # fix.num, numerical fix vector ef.names.idx <- is.na(fix.num) # logical vector indicating the positions of effect names in the fix vector for (ef in fix[ef.names.idx]) { if(!any(ef == names(fixef.fnc(model)))) stop(paste("The fixed effect", ef, "is not present in the model.")) # stops the function if any fixed effect string is not present in the model } fix.num[ef.names.idx] <- which( names(fixef.fnc(model)) %in% fix[ef.names.idx] ) fix <- unique(fix.num) } if (grouping & length(fix) > 0) { new.fix <- NULL # help variable for the construction of a new 'fix' vector for (ef in fix) { # choose the specified effects and all lower-order/higher-order effects of the relevant variables # var.str <- attr(lme4::model.matrix(model), "assign") var.str <- attr(moma, "assign") # var.str, structure of the variables if (!var.str[ef]) { # the specified effect is the intercept new.fix <- unique(c(new.fix, ef)) } else { # the specified effect is NOT the intercept var.mat <- attr(terms(model), "factors") # var.mat, variable matrix ef.order <- attr(terms(model), "order") # ef.order, order of effects (0: intercept, 1: main effect, 2: two-factor interaction, etc.) if (keep) { # choose specified effect an all associated ones of lower order ass.ef <- which( var.str %in% which( colSums( matrix( var.mat[ var.mat[ , var.str[ef] ] == 1, ] , ncol = length(ef.order)) ) == ef.order ) ) # ass.ef, effects associated with the input effect (integer(s)) } else { # choose specified effect an all associated ones of higher order ass.ef <- which( var.str %in% which( colSums( matrix( var.mat[ var.mat[ , var.str[ef] ] == 1, ] , ncol = length(ef.order)) ) == ef.order[var.str[ef]] ) ) } new.fix <- unique(c(new.fix, ass.ef)) } } fix <- new.fix } if (!keep && link == "logit") { # keep effects! fix <- setdiff(seq_along(fixef.fnc(model)), fix) } # remove random factor variance if (identical(ran, "all")) { ran <- lapply(ranef(model), seq_along) } if (length(ran) > 0) { rf_before.end.at <- -1 # for (rf in 1 : length(ran)) { for (rf in seq_along(ranef.fnc(model))) { # rf, random factor if (rf > length(ran)) { # non-specified random factors if (!keep && link == "logit") { # keep effects! ran[[rf]] <- seq.int(ncol(ranef.fnc(model)[[rf]])) } else { ran[[rf]] <- NULL } } else { if(!is.null(ran[[rf]])) ran[[rf]] <- unique(ran[[rf]]) if (!keep && link == "logit") { # keep effects! ran[[rf]] <- setdiff(seq.int(ncol(ranef.fnc(model)[[rf]])), ran[[rf]]) } } n.rf.levels <- nrow(ranef.fnc(model)[[rf]]) # n.rf.levels, number of random-factor levels for (re in ran[[rf]]) { # re, random effect (e.g., intercept, slope) idx.re <- ( ((re - 1) * n.rf.levels + 1) : (re * n.rf.levels) ) + (rf_before.end.at + 1) #idx.re, numbers of lines of the (transpose) random effect matrix for the currect random effect if (mclass %in% c("lmerMod", "glmerMod")) re.matrix <- model@pp$Zt else re.matrix <- model@Zt DV <- DV + sign((keep || link == "logit") - 0.5) * ( as.vector( ranef.fnc(model)[[rf]][ , re] %*% re.matrix[idx.re, ] ) ) # Note. re.matrix is the transpose sparse random-effect matrix (class dgCMatrix) # Note. sign(keep - 0.5) is +1 if keep==TRUE and -1 otherwise } rf_before.end.at <- rf_before.end.at + prod(dim(ranef.fnc(model)[[rf]])) # at which line of the random-effect matrix did the current random effect end? } } # remove fixed effects DV <- DV + sign((keep || link == "logit") - 0.5) * ( matrix(moma[ , fix], nrow = length(DV)) %*% fixef.fnc(model)[fix] ) if (link == "logit") DV <- ( 1 / (1 + exp(- DV)) ) # if DV is rounded, zeros and ones will result if (plot) { warning("The 'plot' parameter is deprecated.") } return(as.vector(DV)) } #############################################################