##### Potsdam Mind Research Repository #####

## This script is part of the publication package for the article:
# Kliegl, R., Hohenstein, S., Yan, M., & McDonald, S. (in press).
# How preview space/time translates into preview cost/benefit for fixation durations during reading.
# Quarterly Journal of Experimental Psychology. 

## The data is from:
# McDonald, S. Parafoveal preview benefit in reading is only obtained from
# the saccade goal. Vision Research (2006) vol. 46 (26) pp. 4416-24
# http://dx.doi.org/10.1016/j.visres.2006.08.027

# History:
# Reanalyses: 14-11-2010, r kliegl
# Modified:   05/06-02-2011, r kliegl 
# Modified:  Bug fix related to fixed effects removal 
# 09-02-2011, s hohenstein / r kliegl
# Updated: 23-04-2012, Sven Hohenstein
# Updated: 19-06-2013, Sven Hohenstein

rm(list =ls())

library(lme4)
library(ggplot2)
source("remef.v0.6.7.R")

data <- read.table("data.Mid_post_bound_first-pass_16SUs.V0.dat", header=TRUE)
# 12 pixels per letter - 96 in total for 8-letter (including spc before) target words.
data$insa <- data$insa/12
data$lp <- data$lp/12   # landing position letters

data$cfrq <- scale(data$lnfreq, center=TRUE, scale=FALSE)
data$cinsa <- scale(data$insa, center=TRUE, scale=FALSE)

levels(data$prev) <- c("correct", "incorrect")
levels(data$bound) <- c("mid-word", "post-word")

table(data$bound)

#################### TABLE 3 w/ LMMs of McDonald (2006)############
# Set up contrasts
# ... sum contrasts 
contrasts(data$prev) <- contr.sum(2)
contrasts(data$bound) <-contr.sum(2)

# ... nested within boundary
data$cond <- factor(paste(data$bound, data$prev, sep="_"), labels=c("M-C", "M-I", "P-C", "P-I"))
cmat <- matrix(c( -1/2, -1/2, +1/2, +1/2,            # Main effect Bound
	              -1,    +1,    0,    0,             # Nested effect 1: Prev | Bound == mid-word
	               0,     0,   -1,   +1),  4,  3)    # Nested effect 2: Prev | Bound == post-word  

rownames(cmat) <- c("M-C", "M-I", "P-C", "P-I")
colnames(cmat) <- c(".Bound", ".Prev|Bound==M", ".Prev|Bound==P")

(contrasts(data$cond) <- cmat)

##### Gaze #####
# ... full model 
m0.lGD.ML  <- lmer(log(gaze) ~ qlp*prev*bound + cfrq*prev*bound + cinsa*prev*bound + (1 | subj) + (1 | word), data, REML=FALSE )      

# ... no 3-factor interaction with qlp and cfrq
m1.lGD.ML  <- lmer(log(gaze) ~ qlp + cfrq + cinsa*prev*bound + (1 | subj) + (1 | word), data, REML=FALSE )       
anova(m0.lGD.ML, m1.lGD.ML)

# ... REML estimates 
# ...  ... for sum contrast
print(m1.lGD  <- lmer(log(gaze) ~ qlp + cfrq + cinsa*prev*bound + (1 | subj) + (1 | word), data, REML=TRUE ), cor=FALSE)       

# ... ... for preview-nested-under-boundary contrast
print(m1.lGD.2  <- lmer(log(gaze) ~ qlp + cfrq + cinsa*cond + (1 | subj) + (1 | word), data, REML=TRUE ), cor=FALSE)       

# ... ... check the two slopes in post-word condition (NOT OPTIMAL; TOO MUCH LOSS OF POWER)
ixPC <- which(data$bound=="post-word" & data$prev=="correct")
print(m1.lGD.3.PC  <- lmer(log(gaze) ~ qlp + cfrq + cinsa + (1 | subj) + (1 | word), data[ixPC, ], REML=TRUE ), cor=FALSE)       

ixPI <- which(data$bound=="post-word" & data$prev=="incorrect")
print(m1.lGD.3.PI  <- lmer(log(gaze) ~ qlp + cfrq + cinsa + (1 | subj) + (1 | word), data[ixPI, ], REML=TRUE ), cor=FALSE)       

##### FFD ######
# ... full model 
m0.lFFD.ML  <- lmer(log(ffd) ~ qlp*prev*bound + cfrq*prev*bound + cinsa*prev*bound + (1 | subj) + (1 | word), data, REML=FALSE )       

# ... no 3-factor interactions and no 2-factor interaction with cfrq or lp
m1.lFFD.ML  <- lmer(log(ffd) ~ prev*bound + cfrq + qlp  + cinsa*(prev+bound) + (1 | subj) + (1 | word), data, REML=FALSE )       
anova(m0.lFFD.ML, m1.lFFD.ML)

# ... REML estimates
# ... ... for sum contrast
print(m1.lFFD  <- lmer(log(ffd) ~ prev*bound + cfrq + qlp  + cinsa*(prev+bound) + (1 | subj) + (1 | word), data, REML=TRUE), cor=FALSE)       

# ... ... or preview-nested-under-boundary contrast
print(m1.lFFD.2  <- lmer(log(ffd) ~ qlp + cfrq + cinsa*cond + (1 | subj) + (1 | word), data, REML=TRUE ), cor=FALSE)       

# ... ... for preview and amplitude nested under boundary with incorrect preview as reference (NOT OPTIMAL)
data$prev.trt <- ifelse(data$prev=="correct", 1, 0)
print(m1.lFFD.3.mid  <- lmer(log(ffd) ~ qlp + cfrq + cinsa*prev.trt + (1 | subj) + (1 | word), 
      data, subset=bound=="mid-word", REML=TRUE ), cor=FALSE)       

print(m1.lFFD.3.post  <- lmer(log(ffd) ~ qlp + cfrq + cinsa*prev.trt + (1 | subj) + (1 | word), 
      data, subset=bound=="post-word", REML=TRUE ), cor=FALSE)       

# ... REML for "GD" model
print(m1.lFFD.x  <- lmer(log(ffd) ~ qlp + cfrq + cinsa*prev*bound + (1 | subj) + (1 | word), data, REML=TRUE), cor=FALSE)       

# ... ... check the two slopes in post-word condition (NOT OPTIMAL; TOO MUCH LOSS OF POWER)
ixPC <- which(data$bound=="post-word" & data$prev=="correct")
print(m1.lFFD.3.PC  <- lmer(log(ffd) ~ qlp + cfrq + cinsa + (1 | subj) + (1 | word), data[ixPC, ], REML=TRUE ), cor=FALSE)       

ixPI <- which(data$bound=="post-word" & data$prev=="incorrect")
print(m1.lFFD.3.PI  <- lmer(log(ffd) ~ qlp + cfrq + cinsa + (1 | subj) + (1 | word), data[ixPI, ], REML=TRUE ), cor=FALSE)       

##### SFD #######
data.sfd <- subset(data, nfix==1)
table(data.sfd$bound)

# ... full model 
m0.lSFD.ML  <- lmer(log(ffd) ~ qlp*prev*bound + cfrq*prev*bound + cinsa*prev*bound + (1 | subj) + (1 | word), data.sfd, REML=FALSE )       

# ... no 3-factor interaction with qlp and cfrq
m1.lSFD.ML  <- lmer(log(ffd) ~ qlp + cfrq + cinsa*prev*bound + (1 | subj) + (1 | word), data.sfd, REML=FALSE )       
anova(m0.lSFD.ML, m1.lSFD.ML)

# ... REML estimates
# ... ... for sum contrast
print(m1.lSFD  <- lmer(log(ffd) ~ qlp  + cfrq + cinsa*prev*bound + (1 | subj) + (1 | word), data.sfd, REML=TRUE), cor=FALSE)       

# ... ... or preview-nested-under-boundary contrast
print(m1.lSFD.2  <- lmer(log(ffd) ~ qlp + cfrq + cinsa*prev*bound + (1 | subj) + (1 | word), data.sfd, REML=TRUE ), cor=FALSE)       
      
# ... ... check the two slopes in post-word condition (NOT OPTIMAL; TOO MUCH LOSS OF POWER)
ixPC <- which(data.sfd$bound=="post-word" & data.sfd$prev=="correct")
print(m1.lSFD.3.PC  <- lmer(log(ffd) ~ qlp + cfrq + cinsa + (1 | subj) + (1 | word), data.sfd[ixPC, ], REML=TRUE ), cor=FALSE)       

ixPI <- which(data.sfd$bound=="post-word" & data.sfd$prev=="incorrect")
print(m1.lSFD.3.PI  <- lmer(log(ffd) ~ qlp + cfrq + cinsa + (1 | subj) + (1 | word), data.sfd[ixPI, ], REML=TRUE ), cor=FALSE)       

#################### FIGURES 3 (GD) and 4 (FFD) and 4a (SFD); effect plots ##########################
# log gaze
data$gaze.adj <- exp(remef(m1.lGD, fix = c(2:3), ran = list(1, 1)))

# ... 3-factor interaction (for log GD) --> FIGURE 3
xbreaks <- 2:12
ybreaks <- seq(150, 350, 50)
p <- ggplot(data=data, aes(x = insa, y = gaze.adj, linetype = prev)) + facet_grid( . ~ bound)
p <- p + stat_smooth(method = "lm", formula = y ~ poly(x, 1), size = 1.2, colour = "black") + scale_linetype("Preview")
p <- p + scale_x_reverse("Incoming saccade amplitude [letters]", breaks = xbreaks, labels = as.character(xbreaks)) 
p <- p + scale_y_continuous("Target gaze duration [ms]", breaks = ybreaks, labels = as.character(ybreaks), trans = "log")
p <- p + coord_cartesian(ylim = c(min(ybreaks), max(ybreaks)), xlim = c(1.5,12)) + theme(legend.position = c (.62, .30))
(gaze.adj <- p)


################################
# log ffd 
# ... remove id- and word-related variance (shrinkage corrected) as well as landing position effect and frequency effects
data$ffd.adj <- exp(remef(m1.lFFD, fix = c(2:3), ran = list(1,1)))

# ... 3-factor interaction (for log FFD NOT significant in LMM) --> FIGURE 4
xbreaks <- 2:12
ybreaks <- seq(150, 350, 50)
p <- ggplot(data=data, aes(x = insa, y = ffd.adj, linetype = prev)) + facet_grid( . ~ bound)
p <- p + stat_smooth(method = "lm", formula = y ~ poly(x, 1), size = 1.2, colour = "black") + scale_linetype("Preview")
p <- p + scale_x_reverse("Incoming saccade amplitude [letters]", breaks = xbreaks, labels = as.character(xbreaks)) 
p <- p + scale_y_continuous("Target first-fixation duration [ms]", breaks = ybreaks, labels = as.character(ybreaks), trans = "log")
p <- p + coord_cartesian(ylim = c(min(ybreaks), max(ybreaks)), xlim = c(1.5,12)) + theme(legend.position = c (.62, .30))
(ffd.adj <- p)

################################
# log sfd. NOTE: data.sfd$sfd.adj
# ... remove id- and word-related variance (shrinkage corrected) as well as landing position effect and frequency effects
data.sfd$sfd.adj <- exp(remef(m1.lSFD, fix = c(2:3), ran = list(1,1)))

# ... 3-factor interaction (for log SFD) --> FIGURE 4a
xbreaks <- 2:12
ybreaks <- seq(150, 400, 50)
p <- ggplot(data=data.sfd, aes(x = insa, y = sfd.adj, linetype = prev)) + facet_grid( . ~ bound)
p <- p + stat_smooth(method = "lm", formula = y ~ poly(x, 1), size = 1.2, colour = "black") + scale_linetype("Preview")
p <- p + scale_x_reverse("Incoming saccade amplitude [letters]", breaks = xbreaks, labels = as.character(xbreaks)) 
p <- p + scale_y_continuous("Target single-fixation duration [ms]", breaks = ybreaks, labels = as.character(ybreaks), trans = "log")
p <- p + coord_cartesian(ylim = c(min(ybreaks), max(ybreaks)), xlim = c(1.5,12)) + theme(legend.position = c (.62, .28))
(sfd.adj <- p)