# Yan, Risse, Zhou, & Kliegl (2010) reanalyses of N+1 and N+2 semantic preview benefits
# R. Kliegl, 05-03-2010

# Redesigned and old script, removed regressions, included phonological condition
# R. Kliegl, 08-08-2010

# Exclude regression-origin fixations on N and N+1 (TW)
# R. Kliegl, 09-08-2010

# Analyze refixation probs
# R. Kliegl, 26-08-2010

library(lme4)
library(ggplot2)

#####################################
# Data Set 1 for TW in Position N+1 #
#####################################

rm(list=ls())
load("chs_np1_09.r2.rda")

# Specify covariate and dependent variable: GD, FFD, or SFD (comes with reduction of nobs)
COV <- "sfd_n0"
DV  <- "n_fix"  # "ffd" "sfd" "gd" "n_fix"

# Set up factors
target1      <- target
target1$id   <- factor(target1$id)
target1$sn   <- factor(target1$sn)
target1$cond <- factor(target1$cond, levels=c( "unr", "sem", "idt", "ort", "pho"))

# N of fixations (recoded to 1, 2+)  NOTE: 0 skipped TW cases should be added to the dataframe!
target1$n_fix    <- ifelse(is.na(target1$ffd   ), 0,ifelse(target1$gd    - target1$ffd    == 0, 1, 2))
target1$n_fix_n0 <- ifelse(is.na(target1$ffd_n0), 0,ifelse(target1$gd_n0 - target1$ffd_n0 == 0, 1, 2))

# Refixations (binary)  TO BE DONE
#target1$rfx_n0   <- ifelse(target1$n_fix_n0 > 1, 1, 0)
#target1$rfx      <- ifelse(target1$n_fix > 1, 1, 0)

# Filter valid N+1 cases with preboundary fixation and no regression from the two words
ix1 <- which(target1$ffd > 60 & target1$ffd < 600 
	       & target1$out_sac_len_n0 > 0 & target1$out_sac_len > 0) 

tar1 <- target1[ ix1, c("id", "sn", "cond", "n_fix_n0", "ffd_n0", "gd_n0",
                                            "n_fix",    "ffd",    "gd"     )]

# Specify covariate
if (COV == "sfd_n0") {
	print("COV: Selecting single fixation durations")
	tar1 <- tar1[which(tar1$n_fix_n0 == 1), ]
	M.cov <- mean(log(tar1$ffd_n0)) 
	tar1$cov <- scale(log(tar1$ffd_n0), center=TRUE, scale=FALSE) 
	x_breaks = seq(150, 400, 50)
	xlim=c(150, 400)
	xlab <- "Preview single fixation duration [ms]"
} else {if (COV == "ffd_n0"){
	print("COV: Selecting first fixation durations")
	M.cov <- mean(log(tar1$ffd_n0)) 
	tar1$cov <- scale(log(tar1$ffd_n0), center=TRUE, scale=FALSE) 
	x_breaks = seq(150, 400, 50)
	xlim=c(150, 400)
	xlab <- "Preview first fixation duration [ms]"
} else {if (COV == "gd_n0"){
	print("COV: Selecting gaze durations")
	M.cov <- mean(log(tar1$gd_n0)) 
	tar1$cov <- scale(log(tar1$gd_n0), center=TRUE, scale=FALSE) 
	x_breaks = seq(150, 600, 50)
	xlim=c(150, 600)
	xlab <- "Preview gaze duration [ms]"	
}}}

# Specify dependent variable
if (DV == "sfd") {
	print("DV: Selecting single fixation durations")
	tar1 <- tar1[which(tar1$n_fix==1), ]
	tar1$dv <- log(tar1$ffd)
	y_breaks = seq(175, 300, 25)
	ylim= c(175, 300)
	ylab <- "Target single fixation duration [ms]"
} else {if (DV == "ffd") {
	print("DV: Selecting first fixation durations")
	tar1$dv <- log(tar1$ffd)
	y_breaks = seq(175, 300, 25)
	ylim= c(175, 300)
	ylab <- "Target first fixation duration [ms]"
} else {if (DV == "gd") {
	print("DV: Selecting gaze durations")
	tar1$dv <- log(tar1$gd)
	y_breaks = seq(200, 375, 25)
	ylim= c(200, 375)
	ylab <- "Target gaze duration [ms]"
} else {if (DV == "n_fix") {
	print("DV: Prob of refixation")
	tar1$dv <- ifelse(tar1$n_fix==1, 0, 1)
	y_breaks = seq(0, 0.75, .25)
	ylim= c(0, 0.75)
}
}}}

# ... LMM
# ... reference is unrelated
contrasts(tar1$cond) <- contr.treatment(5, base=1 )
print(m1  <- lmer(dv ~ cond*cov + (1 | id) + (1 | sn), data=tar1), cor=FALSE)

# ... reference is identical
contrasts(tar1$cond) <- contr.treatment(5, base=3 )
print(m1.identical  <- lmer(dv ~ cond*cov + (1 | id) + (1 | sn), data=tar1), cor=FALSE)

tar1$prd <- fitted(m1)
tar1$res <- resid(m1)

u.id=data.frame(ranef(m1)$id)
names(u.id)[1] <- "u.id"
tar1 <- merge(tar1, u.id, by.x="id", by.y=0)

u.sn=data.frame(ranef(m1)$sn)
names(u.sn)[1] <- "u.sn"
tar1 <- merge(tar1, u.sn, by.x="sn", by.y=0)

# ... ... remove id  and sn related variance (shrinkage corrected)
tar1$dv.adj <- tar1$dv - tar1$u.id - tar1$u.sn

# ... ... check
cast(data=tar1,value='dv',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x)))
cast(data=tar1,value='dv.adj',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x)))

# Adjust covariate for between-id and between-sn differences (again: for removal subject/item correlations)
# ... remove between-id differences
tar1 <- ddply(tar1, .(id), transform, cov.id = cov - mean(cov) + M.cov)

# ... remove between-sn differences from id-adjusted covariate
M <- mean(tapply(tar1$cov, tar1$sn, mean))  # GM of sentences' mean cov
tar1 <- ddply(tar1, .(sn), transform, cov.adj = cov.id - mean(cov.id) + M.cov)

# Checks 
# ... adjustment for id and sn variance in dv -- VERY, VERY SMALL EFFECT
cast(data=tar1,value='dv',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x)))
cast(data=tar1,value='dv.adj',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x)))

# ... adjustment for id and sn variance in cov -- BASICALLY NO EFFECT
cast(data=tar1,value='cov',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x)))
cast(data=tar1,value='cov.id',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x)))
cast(data=tar1,value='cov.adj',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x)))

# Save (input for DataSet3.1.R; joint stuff)
save(m1, tar1, file="dataset1.V5.rda")

##################################################################
#  Generate FIGURE
# IMPORTANT NOTE: All statistics are done in log-log space. 
# In the figure we map everything back into original scale,
# but plot in log-log space. Cool
# Three DVs:  
# ... exp(dv) = sfd or ffd or gd (see "DV" at beginning of script)
# ... exp(dv.adj) = exp(dv) w/ removal of
# ... ... between-id and between-sn variance (shrinkage-corrected) 
# Three Covariates:
# ... gd_no = preboundary gaze duration (note: cov is centered)
# ... exp(cov.id) = preboundary gaze duration w/o between-id differences
# ... exp(cov.adj) = preboundary gaze duration w/o between-sn differences
###################################################################


tar1$Preview <- tar1$cond
levels(tar1$Preview) <- c("unrelated", "semantic", "identical", "orthographic",  "phonological")

p1.obs <- qplot(data = tar1, x=ffd_n0, y=dv, group=Preview, 
	        geom="smooth", method=lm, formula=y ~ poly(x, 1), se=F, 
            colour=Preview, linetype = Preview, size = Preview) + 
			scale_colour_manual(values=c("black", "black", "black", "black", "black")) +
			scale_size_manual(values=c(1.5, 1.5, 1.5, 0.75, 0.75)) +
			scale_linetype_manual(values=c(1, 2, 3, 4, 5)) +
scale_x_log(xlab, breaks = x_breaks, labels = as.character(x_breaks)) +
coord_cartesian(xlim=xlim, ylim= ylim) + 
geom_vline(xintercept=exp(M.cov), alpha=0.5) +
theme_bw() + opts(title="Observed", legend.position = "right") 


p1.adj <- qplot(data = tar1, x=ffd_n0, y=dv, group=Preview, 
	        geom="smooth", method=lm, formula=y ~ poly(x, 1), se=F, 
            colour=Preview, linetype = Preview, size = Preview) + 
			scale_colour_manual(values=c("black", "black", "black", "black", "black")) +
			scale_size_manual(values=c(1.5, 1.5, 1.5, 0.75, 0.75)) +
			scale_linetype_manual(values=c(1, 2, 3, 4, 5)) +
scale_x_log(xlab, breaks = x_breaks, labels = as.character(x_breaks)) +
coord_cartesian(xlim=xlim, ylim= ylim) + 
geom_vline(xintercept=exp(M.cov), alpha=0.5) +
theme_bw() + opts(title="Adjusted", legend.position = "right") 


# Combine the plots
vplayout <- function(x, y) 
viewport(layout.pos.row = x, layout.pos.col = y) 

pdf ("Fig1.pdf", paper="a4")
grid.newpage() 
pushViewport(viewport(layout = grid.layout(2,1))) 

print(p1.obs, vp=vplayout(1,1)) 
print(p1.adj, vp=vplayout(2,1)) 
dev.off ()

# Annotations for preview types
#annotation <- data.frame(cond=levels(tar1$cond), x = rep(155, 5), y = c( 249, 216, 233, 208, 240) )
#(p1 <- p1 + geom_text(aes(x=x, y=y, label=cond), data=annotation, size=4, hjust=0, vjust=0))