# ANALYSES for BoundaryN2-Experiments (Manuscript):
# EXP.1: n+2 frequency preview manipulation
# EXP.2: n+2 alternating case manipulation

# Sarah Risse, October 2011

rm(list=ls())

# SET WORKING DIRECTORY:
#setwd("")

# INPUT:
ifile_em   <- c("n2AC_em_filtered.rda")
ifile_corpus <- c("AC.DWDS.wid.rda")


# OUTPUT:


# LIBRARIES:
library(lme4)
library(reshape)
library(MASS)
library(Hmisc)

# FUNCTIONS:
source("remef.R")

# LOAD DATA:
load(ifile_em)
ls()			# em_n0, em_n1, em_n2, em_n3, em_nm1
str(em_n0)
#'data.frame':	2773 obs. of  40 variables:
# $ id  : subject identification
# $ sn  : sentence number
# $ wn  : current word number in sentence
# $ nw  : total number of words in sentence
# $ wid : unique word identification number across all sentences (same word gets same number)
# $ l0  : letter fixated in single fixation case
# $ l1  : first letter fixated in multiple fixation cases
# $ l2  : second letter fixated in multiple fixation cases
# $ l3  : third letter fixated in multiple fixation cases
# $ ngz : total number of gazes on this word
# $ cond: n+2 processing demand conditions (1) LC-LC (2) AC-LC (3) LC-AC (4) AC-AC
# $ n2bb: n+2 PREVIEW difficulty (-0.5: easy (LC); 0.5: difficult (AC))
# $ n2ab: n+2 TARGET  difficulty (-0.5: easy (LC); 0.5: difficult (AC))
# $ pvn2: n+2 preview condition (-0.5: correct preview - no change; 0.5: incorrect preview - display change)
# $ sn1:  n+1 skipping status (-0.5: fixated; 0.5: skipped)
# $ sn2:  n+2 skipping status (-0.5: fixated; 0.5: skipped)
# $ lxn1: n+1 lexical status (-0.5: function word; 0.5: content word)
# $ f   : centered log-frequency of the fixated word
# $ bf  : centered log-bigram-frequency of the fixated word
# $ tf  : centered log-trigram-frequency of the fixated word
# $ wl  : centered 1/word length of the fixated word
# $ wl1 : centered wl for the word to the left (lag-effect)
# $ f1  : centered f  for the word to the left (lag-effect)
# $ bf1 : centered bf for the word to the left (lag-effect)
# $ tf1 : centered tf for the word to the left (lag-effect)
# $ wl2 : centered wl for the word to the right (successor-effect)
# $ f2  : centered f  for the word to the right (successor-effect)
# $ bf2 : centered bf for the word to the right (successor-effect)
# $ tf2 : centered tf for the word to the right (successor-effect)
# $ wl3 : centered wl for the word two words to the left
# $ f3  : centered f  for the word two words to the left
# $ bf3 : centered bf for the word two words to the left
# $ tf3 : centered tf for the word two words to the left
# $ wl4 : centered wl for the word two words to the right
# $ f4  : centered f  for the word two words to the right
# $ bf4 : centered bf for the word two words to the right
# $ tf4 : centered tf for the word two words to the right
# $ ffd : first fixation duration
# $ gzd : gaze duration
# $ sfd : single fixation duration
# $ tvt : total viewing time
# $ prx : probability of refixation
# $ psk : probability of skipping
# $ prg : probability of regression
# $ ilp : intial landing position (letter position/word length)


#------------------------------------#
# EXPERIMENT 2:
# YOUNG ADULTS (BOUNDARY-N2-AC):
#------------------------------------#

#----------------------------------#
# MAIN ANALYSES:
# for word n, n+1, and n+2
#----------------------------------#

###############################
# WORD N
###############################

xdat <- em_n0
# LMMs REPORTED IN MAIN ANALYSIS:

lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=TRUE) 

lm.1 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=TRUE) 

lm.2 <- lmer(log(sfd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=TRUE) 

print(lm.0, cor=F)
print(lm.1, cor=F)
print(lm.2, cor=F)

###############################
# WORD N+1
###############################

xdat <- em_n1
# LMMs REPORTED IN MAIN ANALYSIS:

lm.0 <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=TRUE)

lm.1 <- lmer(log(ffd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=TRUE)

lm.2 <- lmer(log(sfd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=TRUE)

print(lm.0, cor=F)
print(lm.1, cor=F)
print(lm.2, cor=F)


###############################
# WORD N+2
###############################

xdat <- em_n2
# LMMs REPORTED IN MAIN ANALYSIS:

lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=TRUE)

lm.1 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=TRUE)

lm.2 <- lmer(log(sfd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=TRUE)

print(lm.0, cor=F)
print(lm.1, cor=F)
print(lm.2, cor=F)


# FIGURE #2:
# Word N+2 GZD
##################################################################
# similar for SFD, not for FFD
xdat <- em_n2

# (1) get residuals of data without random effects (use remef.R):
lm1 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=TRUE) 

n2gzd <- lm1@frame
n2gzd$gzd.adj <-exp(remef(lm1, fix = c(), ran = c("id", "wid", "sn")))


# AGGREGATION OF DATA:
# GZD ... descriptives/ effect sizes:
n2gzd.rs <- melt(n2gzd, id=c("id","lxn1","n2bb","n2ab","sn1")
		, measure=c("gzd.adj")
		, na.rm=TRUE)

n2gzd.rs$lxn1 <- as.factor(n2gzd.rs$lxn1)
levels(n2gzd.rs$lxn1) <- c("FW","CW")

n2gzd.rs$n2bb <- as.factor(n2gzd.rs$n2bb)
levels(n2gzd.rs$n2bb) <- c("easy","diff.")

n2gzd.rs$n2ab <- as.factor(n2gzd.rs$n2ab)
levels(n2gzd.rs$n2ab) <- c("easy","diff.")

n2gzd.rs$sn1 <- as.factor(n2gzd.rs$sn1)
levels(n2gzd.rs$sn1) <- c("fixated","skipped")


mPlot <- cast(n2gzd.rs, sn1+n2bb ~ n2ab
	, subset=variable==c("gzd.adj")
	, function(x) c(M=signif(mean(x),3)
	, SE=1.96*(sd(x)/(length(x)^.5)) ))

mPlot$fl1.lower <- mPlot[,3] - mPlot[,4]
mPlot$fl1.upper <- mPlot[,3] + mPlot[,4]

mPlot$fl2.lower <- mPlot[,5] - mPlot[,6]
mPlot$fl2.upper <- mPlot[,5] + mPlot[,6]

data  <- t(as.matrix(mPlot[,c("easy_M", "diff._M")]))
lower <- t(as.matrix(mPlot[,c("fl1.lower","fl2.lower")]))
upper <- t(as.matrix(mPlot[,c("fl1.upper","fl2.upper")]))


#windows(1,width=8, height=5, pointsize=11)
quartz(1,width=8, height=5, pointsize=11)
par(mfrow=c(1,2),lwd=2, cex.axis=1.2, cex.lab=1.2, mar=c(5,4.2,2.5,2.5))


y.limits <- c(180, 320)

plot(c(1,2),data[1,1:2], type='b',lty='solid'
	, pch=21
	, bg='black'
	, ylim=y.limits
	, xlim=c(0.8,2.2)
	, ylab=c('Gaze duration on word n+2 [ms]')
	, xlab=c('N+2 preview difficulty')
#	, log="y"
	, axes=F)
	axis(side=1, at=c(1,2), labels=c("easy","difficult")
	, tick=T)
	axis(side=2)
	errbar(c(1,2),data[1,1:2], yplus=upper[1,1:2]
	, yminus=lower[1,1:2], add=T, col="black", lty="solid"
	, lwd=1.5, xlab=c(), ylab=c())
	errbar(c(1,2),data[2,1:2], yplus=upper[2,1:2]
	, yminus=lower[2,1:2], add=T, col="black", lty="solid"
	, lwd=1.5, xlab=c(), ylab=c())
	lines(c(1,2),data[2,1:2], type='b', lty='dashed'
	, pch=21, bg='white')
	lines(c(1,2),data[1,1:2], type='b', lty='solid'
	, pch=21, bg='black')	
	box(lty='solid')
	title(c("N+1: fixated"))

	legend(1.3,y.limits[2], legend=c("easy", "difficult")
	, title=c("N+2 target difficulty")
	, lty=c('solid','dashed')
	, pch=c(21,21)
	, pt.bg=c('black','white')
	, merge=F
	, ncol=1, #bty='n')
	, bg='white'
	, box.col='white')


plot(c(1,2),data[1,3:4], type='b',lty='solid'
	, pch=21
	, bg='black'
	, ylim=y.limits
	, xlim=c(0.8,2.2)
	, ylab=c('Gaze duration on word n+2 [ms]')
	, xlab=c('N+2 preview difficulty')
#	, log="y"
	, axes=F)
	axis(side=1, at=c(1,2), labels=c("easy","difficult")
	, tick=T)
	axis(side=2)
	errbar(c(1,2),data[1,3:4], yplus=upper[1,3:4]
	, yminus=lower[1,3:4], add=T, col="black", lty="solid"
	, lwd=1.5, xlab=c(), ylab=c())
	errbar(c(1,2),data[2,3:4], yplus=upper[2,3:4]
	, yminus=lower[2,3:4], add=T, col="black", lty="solid"
	, lwd=1.5, xlab=c(), ylab=c())
	lines(c(1,2),data[2,3:4], type='b', lty='dashed'
	, pch=21, bg='white')
	lines(c(1,2),data[1,3:4], type='b', lty='solid'
	, pch=21, bg='black')	
	box(lty='solid')
	title(c("N+1: skipped"))


#----------------------------------#
# NOT REPORTED ...
# SUPPLEMENTARY ANALYSES:
#----------------------------------#

#-------------------------------------------------------------#
# CHECK INFLUENCE OF LAST FIXATION LOCATION PRIOR TO BOUNDARY:
# does this affect preview effects of word n+2? 
#-------------------------------------------------------------#

unique(em_n0$ngz) 
length(which(em_n0$ngz>=4))/nrow(em_n0)*100
# only 0.11 % of cases (3 out of 2773) show 4 or more fixations on word n
# in word-based dataframe, the fixation position (letter l0,l1,l2,l3) is
# only stored for up to three fixations
# however, the error if we use l3 for the cases with 4 or more fixations
# is quite small... (and the results do not change significantly if we 
# exclude the data >= 4 ngz)


xSingleFix <- which(em_n0$ngz==1)
xTwoFix    <- which(em_n0$ngz==2)
xThreeFix  <- which(em_n0$ngz==3)
xMoreFix   <- which(em_n0$ngz>3)

em_n0$tmp <- NA
em_n0$tmp[xSingleFix] <- em_n0$l0[xSingleFix]
em_n0$tmp[xTwoFix] <- pmax(em_n0$l1[xTwoFix],em_n0$wl2[xTwoFix])
em_n0$tmp[xThreeFix] <- pmax(em_n0$l1[xThreeFix],em_n0$l2[xThreeFix],em_n0$l3[xThreeFix])
em_n0$tmp[xMoreFix] <- pmax(em_n0$l1[xMoreFix],em_n0$l2[xMoreFix],em_n0$l3[xMoreFix])

load(ifile_corpus)
xSnWn_em <- 1e2*em_n0$sn + em_n0$wn  
xSnWn_cp <- 1e2*n2.dwds$sn + n2.dwds$wn  

xCorpusToEM <- match(xSnWn_em,xSnWn_cp)
em_n0$wll <- n2.dwds$l[xCorpusToEM] 


# (1) LAST FIXATION POSITION PRIOR TO BOUNDARY:

em_n0$llbb <- em_n0$wll-em_n0$tmp+1
em_n0$clbb  <- scale(em_n0$llbb, scale=F)
em_n0$flbb  <- ifelse(em_n0$llbb <= median(em_n0$llbb, na.rm=T), -0.5, 0.5)

em_n1$llbb <- em_n0$llbb
em_n1$clbb <- em_n0$clbb
em_n1$flbb <- em_n0$flbb

em_n2$llbb <- em_n0$llbb
em_n2$clbb <- em_n0$clbb
em_n2$flbb <- em_n0$flbb


# (2) SINGLE vs. MULTIPLE FIXATION CASES:

em_n0$ffc <- ifelse(em_n0$ngz == 1, -0.5, 0.5)
em_n1$ffc <- em_n0$ffc
em_n2$ffc <- em_n0$ffc


###############################
# WORD N
###############################

xdat <- em_n0


lm0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb)^2 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm0, cor=F)


lm1 <- lmer(log(gzd) ~ flbb*(sn1+lxn1+n2bb)^2 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm1, cor=F)

lm2 <- lmer(log(sfd) ~ flbb*(sn1+lxn1+n2bb)^2 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm2, cor=F)

lm3 <- lmer(log(ffd) ~ flbb*(sn1+lxn1+n2bb)^2 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm3, cor=F)


lm5 <- lmer(log(gzd) ~ (flbb*ffc)*(sn1+lxn1+n2bb)^2 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm5, cor=F)


###############################
# WORD N+1
###############################

xdat <- em_n1


lm0 <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab)^2 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm0, cor=F)


lm1 <- lmer(log(gzd) ~ flbb*(lxn1+n2bb+n2ab)^2 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm1, cor=F)
anova(lm1,lm0)

lm2 <- lmer(log(sfd) ~ flbb*(lxn1+n2bb+n2ab)^2 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm2, cor=F)

lm3 <- lmer(log(ffd) ~ flbb*(lxn1+n2bb+n2ab)^2 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm3, cor=F)


lm5 <- lmer(log(gzd) ~ (flbb*ffc)*(lxn1+n2bb+n2ab)^2 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm5, cor=F)


###############################
# WORD N+2
###############################

xdat <- em_n2


lm0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm0, cor=F)


lm1 <- lmer(log(gzd) ~ flbb*(sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm1, cor=F)
anova(lm1,lm0)

lm2 <- lmer(log(sfd) ~ flbb*(sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm2, cor=F)

lm3 <- lmer(log(ffd) ~ flbb*(sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm3, cor=F)


lm5 <- lmer(log(gzd) ~ (flbb*ffc)*(sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
, data=xdat, REML=FALSE) 
print(lm5, cor=F)


# PLOT AFTER REMEF

source("remef.R")

dat <- lm3@frame
dat$ffd.adj <- exp(remef(lm3, fix = c(1:21,23:30), ran = c("sn", "wid", "id")))


# descriptives/ effect sizes:
n2.rs <- melt(dat, id=c("id","lxn1","n2bb","n2ab","flbb","sn1")
		, measure=c("ffd.adj")
		, na.rm=TRUE)


mPlot <- cast(n2.rs, flbb+sn1 ~ n2bb
	, subset=variable==c("ffd.adj")
	, function(x) c(M=signif(mean(x),3)
	, SE=1.96*(sd(x)/(length(x)^.5)) ))


mPlot$fl1.lower <- mPlot[,3] - mPlot[,4]
mPlot$fl1.upper <- mPlot[,3] + mPlot[,4]

mPlot$fl2.lower <- mPlot[,5] - mPlot[,6]
mPlot$fl2.upper <- mPlot[,5] + mPlot[,6]

data  <- t(as.matrix(mPlot[,c("-0.5_M", "0.5_M")]))
lower <- t(as.matrix(mPlot[,c("fl1.lower","fl2.lower")]))
upper <- t(as.matrix(mPlot[,c("fl1.upper","fl2.upper")]))


windows(1,width=8, height=5, pointsize=11)
par(mfrow=c(1,2),lwd=2, cex.axis=1.2, cex.lab=1.2, mar=c(5,4.2,2.5,2.5))

n1 = c(1:2)

y.limits <- c(0.9, 1.3)
plot(c(1,2),data[1,n1], type='b',lty='solid'
	, pch=21
	, bg='black'
	, ylim=y.limits
	, xlim=c(0.8,2.2)
	, ylab=c('First fixation duration on word n+2 [ms]')
	, xlab=c('N+1 skipping')
	, axes=F)
	axis(side=1, at=c(1,2), labels=c("fixated","skipped")
	, tick=T)
	axis(side=2)
	errbar(c(1,2),data[1,n1], yplus=upper[1,n1]
	, yminus=lower[1,n1], add=T, col="black", lty="solid"
	, lwd=1.5)
	errbar(c(1,2),data[2,n1], yplus=upper[2,n1]
	, yminus=lower[2,n1], add=T, col="black", lty="solid"
	, lwd=1.5)
	lines(c(1,2),data[2,n1], type='b', lty='dashed'
	, pch=21, bg='white')
	lines(c(1,2),data[1,n1], type='b', lty='solid'
	, pch=21, bg='black')	
	box(lty='solid')
	title(c("FixDist to boundary: far"))

	legend(0.9,1.3, legend=c("easy", "difficult")
	, title=c("N+2 preview difficulty")
	, lty=c('solid','dashed')
	, pch=c(21,21)
	, pt.bg=c('black','white')
	, merge=F
	, ncol=1, #bty='n')
	, bg='white'
	, box.col='white')


n1 = c(3:4)

y.limits <- c(0.8, 1.3)
plot(c(1,2),data[1,n1], type='b',lty='solid'
	, pch=21
	, bg='black'
	, ylim=y.limits
	, xlim=c(0.8,2.2)
	, ylab=c('First fixation duration on word n+2 [ms]')
	, xlab=c('N+1 skipping')
	, axes=F)
	axis(side=1, at=c(1,2), labels=c("fixated","skipped")
	, tick=T)
	axis(side=2)
	errbar(c(1,2),data[1,n1], yplus=upper[1,n1]
	, yminus=lower[1,n1], add=T, col="black", lty="solid"
	, lwd=1.5)
	errbar(c(1,2),data[2,n1], yplus=upper[2,n1]
	, yminus=lower[2,n1], add=T, col="black", lty="solid"
	, lwd=1.5)
	lines(c(1,2),data[2,n1], type='b', lty='dashed'
	, pch=21, bg='white')
	lines(c(1,2),data[1,n1], type='b', lty='solid'
	, pch=21, bg='black')	
	box(lty='solid')
	title(c("FixDist to boundary: near"))