# Yan, Risse, Zhou, & Kliegl (2010) reanalyses of N+1 and N+2 semantic preview benefits # R. Kliegl, 05-03-2010 # Redesigned and old script, removed regressions, included phonological condition # R. Kliegl, 08-08-2010 # Exclude regression-origin fixations on N and N+1 (TW) # R. Kliegl, 09-08-2010 # Analyze refixation probs # R. Kliegl, 26-08-2010 library(lme4) library(ggplot2) ##################################### # Data Set 1 for TW in Position N+1 # ##################################### rm(list=ls()) load("chs_np1_09.r2.rda") # Specify covariate and dependent variable: GD, FFD, or SFD (comes with reduction of nobs) COV <- "sfd_n0" DV <- "n_fix" # "ffd" "sfd" "gd" "n_fix" # Set up factors target1 <- target target1$id <- factor(target1$id) target1$sn <- factor(target1$sn) target1$cond <- factor(target1$cond, levels=c( "unr", "sem", "idt", "ort", "pho")) # N of fixations (recoded to 1, 2+) NOTE: 0 skipped TW cases should be added to the dataframe! target1$n_fix <- ifelse(is.na(target1$ffd ), 0,ifelse(target1$gd - target1$ffd == 0, 1, 2)) target1$n_fix_n0 <- ifelse(is.na(target1$ffd_n0), 0,ifelse(target1$gd_n0 - target1$ffd_n0 == 0, 1, 2)) # Refixations (binary) TO BE DONE #target1$rfx_n0 <- ifelse(target1$n_fix_n0 > 1, 1, 0) #target1$rfx <- ifelse(target1$n_fix > 1, 1, 0) # Filter valid N+1 cases with preboundary fixation and no regression from the two words ix1 <- which(target1$ffd > 60 & target1$ffd < 600 & target1$out_sac_len_n0 > 0 & target1$out_sac_len > 0) tar1 <- target1[ ix1, c("id", "sn", "cond", "n_fix_n0", "ffd_n0", "gd_n0", "n_fix", "ffd", "gd" )] # Specify covariate if (COV == "sfd_n0") { print("COV: Selecting single fixation durations") tar1 <- tar1[which(tar1$n_fix_n0 == 1), ] M.cov <- mean(log(tar1$ffd_n0)) tar1$cov <- scale(log(tar1$ffd_n0), center=TRUE, scale=FALSE) x_breaks = seq(150, 400, 50) xlim=c(150, 400) xlab <- "Preview single fixation duration [ms]" } else {if (COV == "ffd_n0"){ print("COV: Selecting first fixation durations") M.cov <- mean(log(tar1$ffd_n0)) tar1$cov <- scale(log(tar1$ffd_n0), center=TRUE, scale=FALSE) x_breaks = seq(150, 400, 50) xlim=c(150, 400) xlab <- "Preview first fixation duration [ms]" } else {if (COV == "gd_n0"){ print("COV: Selecting gaze durations") M.cov <- mean(log(tar1$gd_n0)) tar1$cov <- scale(log(tar1$gd_n0), center=TRUE, scale=FALSE) x_breaks = seq(150, 600, 50) xlim=c(150, 600) xlab <- "Preview gaze duration [ms]" }}} # Specify dependent variable if (DV == "sfd") { print("DV: Selecting single fixation durations") tar1 <- tar1[which(tar1$n_fix==1), ] tar1$dv <- log(tar1$ffd) y_breaks = seq(175, 300, 25) ylim= c(175, 300) ylab <- "Target single fixation duration [ms]" } else {if (DV == "ffd") { print("DV: Selecting first fixation durations") tar1$dv <- log(tar1$ffd) y_breaks = seq(175, 300, 25) ylim= c(175, 300) ylab <- "Target first fixation duration [ms]" } else {if (DV == "gd") { print("DV: Selecting gaze durations") tar1$dv <- log(tar1$gd) y_breaks = seq(200, 375, 25) ylim= c(200, 375) ylab <- "Target gaze duration [ms]" } else {if (DV == "n_fix") { print("DV: Prob of refixation") tar1$dv <- ifelse(tar1$n_fix==1, 0, 1) y_breaks = seq(0, 0.75, .25) ylim= c(0, 0.75) } }}} # ... LMM # ... reference is unrelated contrasts(tar1$cond) <- contr.treatment(5, base=1 ) print(m1 <- lmer(dv ~ cond*cov + (1 | id) + (1 | sn), data=tar1), cor=FALSE) # ... reference is identical contrasts(tar1$cond) <- contr.treatment(5, base=3 ) print(m1.identical <- lmer(dv ~ cond*cov + (1 | id) + (1 | sn), data=tar1), cor=FALSE) tar1$prd <- fitted(m1) tar1$res <- resid(m1) u.id=data.frame(ranef(m1)$id) names(u.id)[1] <- "u.id" tar1 <- merge(tar1, u.id, by.x="id", by.y=0) u.sn=data.frame(ranef(m1)$sn) names(u.sn)[1] <- "u.sn" tar1 <- merge(tar1, u.sn, by.x="sn", by.y=0) # ... ... remove id and sn related variance (shrinkage corrected) tar1$dv.adj <- tar1$dv - tar1$u.id - tar1$u.sn # ... ... check cast(data=tar1,value='dv',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x))) cast(data=tar1,value='dv.adj',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x))) # Adjust covariate for between-id and between-sn differences (again: for removal subject/item correlations) # ... remove between-id differences tar1 <- ddply(tar1, .(id), transform, cov.id = cov - mean(cov) + M.cov) # ... remove between-sn differences from id-adjusted covariate M <- mean(tapply(tar1$cov, tar1$sn, mean)) # GM of sentences' mean cov tar1 <- ddply(tar1, .(sn), transform, cov.adj = cov.id - mean(cov.id) + M.cov) # Checks # ... adjustment for id and sn variance in dv -- VERY, VERY SMALL EFFECT cast(data=tar1,value='dv',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x))) cast(data=tar1,value='dv.adj',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x))) # ... adjustment for id and sn variance in cov -- BASICALLY NO EFFECT cast(data=tar1,value='cov',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x))) cast(data=tar1,value='cov.id',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x))) cast(data=tar1,value='cov.adj',cond ~., function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x))) # Save (input for DataSet3.1.R; joint stuff) save(m1, tar1, file="dataset1.V5.rda") ################################################################## # Generate FIGURE # IMPORTANT NOTE: All statistics are done in log-log space. # In the figure we map everything back into original scale, # but plot in log-log space. Cool # Three DVs: # ... exp(dv) = sfd or ffd or gd (see "DV" at beginning of script) # ... exp(dv.adj) = exp(dv) w/ removal of # ... ... between-id and between-sn variance (shrinkage-corrected) # Three Covariates: # ... gd_no = preboundary gaze duration (note: cov is centered) # ... exp(cov.id) = preboundary gaze duration w/o between-id differences # ... exp(cov.adj) = preboundary gaze duration w/o between-sn differences ################################################################### tar1$Preview <- tar1$cond levels(tar1$Preview) <- c("unrelated", "semantic", "identical", "orthographic", "phonological") p1.obs <- qplot(data = tar1, x=ffd_n0, y=dv, group=Preview, geom="smooth", method=lm, formula=y ~ poly(x, 1), se=F, colour=Preview, linetype = Preview, size = Preview) + scale_colour_manual(values=c("black", "black", "black", "black", "black")) + scale_size_manual(values=c(1.5, 1.5, 1.5, 0.75, 0.75)) + scale_linetype_manual(values=c(1, 2, 3, 4, 5)) + scale_x_log(xlab, breaks = x_breaks, labels = as.character(x_breaks)) + coord_cartesian(xlim=xlim, ylim= ylim) + geom_vline(xintercept=exp(M.cov), alpha=0.5) + theme_bw() + opts(title="Observed", legend.position = "right") p1.adj <- qplot(data = tar1, x=ffd_n0, y=dv, group=Preview, geom="smooth", method=lm, formula=y ~ poly(x, 1), se=F, colour=Preview, linetype = Preview, size = Preview) + scale_colour_manual(values=c("black", "black", "black", "black", "black")) + scale_size_manual(values=c(1.5, 1.5, 1.5, 0.75, 0.75)) + scale_linetype_manual(values=c(1, 2, 3, 4, 5)) + scale_x_log(xlab, breaks = x_breaks, labels = as.character(x_breaks)) + coord_cartesian(xlim=xlim, ylim= ylim) + geom_vline(xintercept=exp(M.cov), alpha=0.5) + theme_bw() + opts(title="Adjusted", legend.position = "right") # Combine the plots vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y) pdf ("Fig1.pdf", paper="a4") grid.newpage() pushViewport(viewport(layout = grid.layout(2,1))) print(p1.obs, vp=vplayout(1,1)) print(p1.adj, vp=vplayout(2,1)) dev.off () # Annotations for preview types #annotation <- data.frame(cond=levels(tar1$cond), x = rep(155, 5), y = c( 249, 216, 233, 208, 240) ) #(p1 <- p1 + geom_text(aes(x=x, y=y, label=cond), data=annotation, size=4, hjust=0, vjust=0))