# ANALYSES for BoundaryN2-Experiments (Manuscript): # EXP.1: n+2 frequency preview manipulation # EXP.2: n+2 alternating case manipulation # Sarah Risse, October 2011 rm(list=ls()) # SET WORKING DIRECTORY: #setwd("") # INPUT: ifile_em <- c("n2AC_em_filtered.rda") ifile_corpus <- c("AC.DWDS.wid.rda") # OUTPUT: # LIBRARIES: library(lme4) library(reshape) library(MASS) library(Hmisc) # FUNCTIONS: source("remef.R") # LOAD DATA: load(ifile_em) ls() # em_n0, em_n1, em_n2, em_n3, em_nm1 str(em_n0) #'data.frame': 2773 obs. of 40 variables: # $ id : subject identification # $ sn : sentence number # $ wn : current word number in sentence # $ nw : total number of words in sentence # $ wid : unique word identification number across all sentences (same word gets same number) # $ l0 : letter fixated in single fixation case # $ l1 : first letter fixated in multiple fixation cases # $ l2 : second letter fixated in multiple fixation cases # $ l3 : third letter fixated in multiple fixation cases # $ ngz : total number of gazes on this word # $ cond: n+2 processing demand conditions (1) LC-LC (2) AC-LC (3) LC-AC (4) AC-AC # $ n2bb: n+2 PREVIEW difficulty (-0.5: easy (LC); 0.5: difficult (AC)) # $ n2ab: n+2 TARGET difficulty (-0.5: easy (LC); 0.5: difficult (AC)) # $ pvn2: n+2 preview condition (-0.5: correct preview - no change; 0.5: incorrect preview - display change) # $ sn1: n+1 skipping status (-0.5: fixated; 0.5: skipped) # $ sn2: n+2 skipping status (-0.5: fixated; 0.5: skipped) # $ lxn1: n+1 lexical status (-0.5: function word; 0.5: content word) # $ f : centered log-frequency of the fixated word # $ bf : centered log-bigram-frequency of the fixated word # $ tf : centered log-trigram-frequency of the fixated word # $ wl : centered 1/word length of the fixated word # $ wl1 : centered wl for the word to the left (lag-effect) # $ f1 : centered f for the word to the left (lag-effect) # $ bf1 : centered bf for the word to the left (lag-effect) # $ tf1 : centered tf for the word to the left (lag-effect) # $ wl2 : centered wl for the word to the right (successor-effect) # $ f2 : centered f for the word to the right (successor-effect) # $ bf2 : centered bf for the word to the right (successor-effect) # $ tf2 : centered tf for the word to the right (successor-effect) # $ wl3 : centered wl for the word two words to the left # $ f3 : centered f for the word two words to the left # $ bf3 : centered bf for the word two words to the left # $ tf3 : centered tf for the word two words to the left # $ wl4 : centered wl for the word two words to the right # $ f4 : centered f for the word two words to the right # $ bf4 : centered bf for the word two words to the right # $ tf4 : centered tf for the word two words to the right # $ ffd : first fixation duration # $ gzd : gaze duration # $ sfd : single fixation duration # $ tvt : total viewing time # $ prx : probability of refixation # $ psk : probability of skipping # $ prg : probability of regression # $ ilp : intial landing position (letter position/word length) #------------------------------------# # EXPERIMENT 2: # YOUNG ADULTS (BOUNDARY-N2-AC): #------------------------------------# #----------------------------------# # MAIN ANALYSES: # for word n, n+1, and n+2 #----------------------------------# ############################### # WORD N ############################### xdat <- em_n0 # LMMs REPORTED IN MAIN ANALYSIS: lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.1 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.2 <- lmer(log(sfd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.0, cor=F) print(lm.1, cor=F) print(lm.2, cor=F) ############################### # WORD N+1 ############################### xdat <- em_n1 # LMMs REPORTED IN MAIN ANALYSIS: lm.0 <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.1 <- lmer(log(ffd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.2 <- lmer(log(sfd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.0, cor=F) print(lm.1, cor=F) print(lm.2, cor=F) ############################### # WORD N+2 ############################### xdat <- em_n2 # LMMs REPORTED IN MAIN ANALYSIS: lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.1 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.2 <- lmer(log(sfd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.0, cor=F) print(lm.1, cor=F) print(lm.2, cor=F) # FIGURE #2: # Word N+2 GZD ################################################################## # similar for SFD, not for FFD xdat <- em_n2 # (1) get residuals of data without random effects (use remef.R): lm1 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) n2gzd <- lm1@frame n2gzd$gzd.adj <-exp(remef(lm1, fix = c(), ran = c("id", "wid", "sn"))) # AGGREGATION OF DATA: # GZD ... descriptives/ effect sizes: n2gzd.rs <- melt(n2gzd, id=c("id","lxn1","n2bb","n2ab","sn1") , measure=c("gzd.adj") , na.rm=TRUE) n2gzd.rs$lxn1 <- as.factor(n2gzd.rs$lxn1) levels(n2gzd.rs$lxn1) <- c("FW","CW") n2gzd.rs$n2bb <- as.factor(n2gzd.rs$n2bb) levels(n2gzd.rs$n2bb) <- c("easy","diff.") n2gzd.rs$n2ab <- as.factor(n2gzd.rs$n2ab) levels(n2gzd.rs$n2ab) <- c("easy","diff.") n2gzd.rs$sn1 <- as.factor(n2gzd.rs$sn1) levels(n2gzd.rs$sn1) <- c("fixated","skipped") mPlot <- cast(n2gzd.rs, sn1+n2bb ~ n2ab , subset=variable==c("gzd.adj") , function(x) c(M=signif(mean(x),3) , SE=1.96*(sd(x)/(length(x)^.5)) )) mPlot$fl1.lower <- mPlot[,3] - mPlot[,4] mPlot$fl1.upper <- mPlot[,3] + mPlot[,4] mPlot$fl2.lower <- mPlot[,5] - mPlot[,6] mPlot$fl2.upper <- mPlot[,5] + mPlot[,6] data <- t(as.matrix(mPlot[,c("easy_M", "diff._M")])) lower <- t(as.matrix(mPlot[,c("fl1.lower","fl2.lower")])) upper <- t(as.matrix(mPlot[,c("fl1.upper","fl2.upper")])) #windows(1,width=8, height=5, pointsize=11) quartz(1,width=8, height=5, pointsize=11) par(mfrow=c(1,2),lwd=2, cex.axis=1.2, cex.lab=1.2, mar=c(5,4.2,2.5,2.5)) y.limits <- c(180, 320) plot(c(1,2),data[1,1:2], type='b',lty='solid' , pch=21 , bg='black' , ylim=y.limits , xlim=c(0.8,2.2) , ylab=c('Gaze duration on word n+2 [ms]') , xlab=c('N+2 preview difficulty') # , log="y" , axes=F) axis(side=1, at=c(1,2), labels=c("easy","difficult") , tick=T) axis(side=2) errbar(c(1,2),data[1,1:2], yplus=upper[1,1:2] , yminus=lower[1,1:2], add=T, col="black", lty="solid" , lwd=1.5, xlab=c(), ylab=c()) errbar(c(1,2),data[2,1:2], yplus=upper[2,1:2] , yminus=lower[2,1:2], add=T, col="black", lty="solid" , lwd=1.5, xlab=c(), ylab=c()) lines(c(1,2),data[2,1:2], type='b', lty='dashed' , pch=21, bg='white') lines(c(1,2),data[1,1:2], type='b', lty='solid' , pch=21, bg='black') box(lty='solid') title(c("N+1: fixated")) legend(1.3,y.limits[2], legend=c("easy", "difficult") , title=c("N+2 target difficulty") , lty=c('solid','dashed') , pch=c(21,21) , pt.bg=c('black','white') , merge=F , ncol=1, #bty='n') , bg='white' , box.col='white') plot(c(1,2),data[1,3:4], type='b',lty='solid' , pch=21 , bg='black' , ylim=y.limits , xlim=c(0.8,2.2) , ylab=c('Gaze duration on word n+2 [ms]') , xlab=c('N+2 preview difficulty') # , log="y" , axes=F) axis(side=1, at=c(1,2), labels=c("easy","difficult") , tick=T) axis(side=2) errbar(c(1,2),data[1,3:4], yplus=upper[1,3:4] , yminus=lower[1,3:4], add=T, col="black", lty="solid" , lwd=1.5, xlab=c(), ylab=c()) errbar(c(1,2),data[2,3:4], yplus=upper[2,3:4] , yminus=lower[2,3:4], add=T, col="black", lty="solid" , lwd=1.5, xlab=c(), ylab=c()) lines(c(1,2),data[2,3:4], type='b', lty='dashed' , pch=21, bg='white') lines(c(1,2),data[1,3:4], type='b', lty='solid' , pch=21, bg='black') box(lty='solid') title(c("N+1: skipped")) #----------------------------------# # NOT REPORTED ... # SUPPLEMENTARY ANALYSES: #----------------------------------# #-------------------------------------------------------------# # CHECK INFLUENCE OF LAST FIXATION LOCATION PRIOR TO BOUNDARY: # does this affect preview effects of word n+2? #-------------------------------------------------------------# unique(em_n0$ngz) length(which(em_n0$ngz>=4))/nrow(em_n0)*100 # only 0.11 % of cases (3 out of 2773) show 4 or more fixations on word n # in word-based dataframe, the fixation position (letter l0,l1,l2,l3) is # only stored for up to three fixations # however, the error if we use l3 for the cases with 4 or more fixations # is quite small... (and the results do not change significantly if we # exclude the data >= 4 ngz) xSingleFix <- which(em_n0$ngz==1) xTwoFix <- which(em_n0$ngz==2) xThreeFix <- which(em_n0$ngz==3) xMoreFix <- which(em_n0$ngz>3) em_n0$tmp <- NA em_n0$tmp[xSingleFix] <- em_n0$l0[xSingleFix] em_n0$tmp[xTwoFix] <- pmax(em_n0$l1[xTwoFix],em_n0$wl2[xTwoFix]) em_n0$tmp[xThreeFix] <- pmax(em_n0$l1[xThreeFix],em_n0$l2[xThreeFix],em_n0$l3[xThreeFix]) em_n0$tmp[xMoreFix] <- pmax(em_n0$l1[xMoreFix],em_n0$l2[xMoreFix],em_n0$l3[xMoreFix]) load(ifile_corpus) xSnWn_em <- 1e2*em_n0$sn + em_n0$wn xSnWn_cp <- 1e2*n2.dwds$sn + n2.dwds$wn xCorpusToEM <- match(xSnWn_em,xSnWn_cp) em_n0$wll <- n2.dwds$l[xCorpusToEM] # (1) LAST FIXATION POSITION PRIOR TO BOUNDARY: em_n0$llbb <- em_n0$wll-em_n0$tmp+1 em_n0$clbb <- scale(em_n0$llbb, scale=F) em_n0$flbb <- ifelse(em_n0$llbb <= median(em_n0$llbb, na.rm=T), -0.5, 0.5) em_n1$llbb <- em_n0$llbb em_n1$clbb <- em_n0$clbb em_n1$flbb <- em_n0$flbb em_n2$llbb <- em_n0$llbb em_n2$clbb <- em_n0$clbb em_n2$flbb <- em_n0$flbb # (2) SINGLE vs. MULTIPLE FIXATION CASES: em_n0$ffc <- ifelse(em_n0$ngz == 1, -0.5, 0.5) em_n1$ffc <- em_n0$ffc em_n2$ffc <- em_n0$ffc ############################### # WORD N ############################### xdat <- em_n0 lm0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb)^2 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm0, cor=F) lm1 <- lmer(log(gzd) ~ flbb*(sn1+lxn1+n2bb)^2 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm1, cor=F) lm2 <- lmer(log(sfd) ~ flbb*(sn1+lxn1+n2bb)^2 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm2, cor=F) lm3 <- lmer(log(ffd) ~ flbb*(sn1+lxn1+n2bb)^2 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm3, cor=F) lm5 <- lmer(log(gzd) ~ (flbb*ffc)*(sn1+lxn1+n2bb)^2 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm5, cor=F) ############################### # WORD N+1 ############################### xdat <- em_n1 lm0 <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab)^2 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm0, cor=F) lm1 <- lmer(log(gzd) ~ flbb*(lxn1+n2bb+n2ab)^2 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm1, cor=F) anova(lm1,lm0) lm2 <- lmer(log(sfd) ~ flbb*(lxn1+n2bb+n2ab)^2 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm2, cor=F) lm3 <- lmer(log(ffd) ~ flbb*(lxn1+n2bb+n2ab)^2 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm3, cor=F) lm5 <- lmer(log(gzd) ~ (flbb*ffc)*(lxn1+n2bb+n2ab)^2 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm5, cor=F) ############################### # WORD N+2 ############################### xdat <- em_n2 lm0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm0, cor=F) lm1 <- lmer(log(gzd) ~ flbb*(sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm1, cor=F) anova(lm1,lm0) lm2 <- lmer(log(sfd) ~ flbb*(sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm2, cor=F) lm3 <- lmer(log(ffd) ~ flbb*(sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm3, cor=F) lm5 <- lmer(log(gzd) ~ (flbb*ffc)*(sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=FALSE) print(lm5, cor=F) # PLOT AFTER REMEF source("remef.R") dat <- lm3@frame dat$ffd.adj <- exp(remef(lm3, fix = c(1:21,23:30), ran = c("sn", "wid", "id"))) # descriptives/ effect sizes: n2.rs <- melt(dat, id=c("id","lxn1","n2bb","n2ab","flbb","sn1") , measure=c("ffd.adj") , na.rm=TRUE) mPlot <- cast(n2.rs, flbb+sn1 ~ n2bb , subset=variable==c("ffd.adj") , function(x) c(M=signif(mean(x),3) , SE=1.96*(sd(x)/(length(x)^.5)) )) mPlot$fl1.lower <- mPlot[,3] - mPlot[,4] mPlot$fl1.upper <- mPlot[,3] + mPlot[,4] mPlot$fl2.lower <- mPlot[,5] - mPlot[,6] mPlot$fl2.upper <- mPlot[,5] + mPlot[,6] data <- t(as.matrix(mPlot[,c("-0.5_M", "0.5_M")])) lower <- t(as.matrix(mPlot[,c("fl1.lower","fl2.lower")])) upper <- t(as.matrix(mPlot[,c("fl1.upper","fl2.upper")])) windows(1,width=8, height=5, pointsize=11) par(mfrow=c(1,2),lwd=2, cex.axis=1.2, cex.lab=1.2, mar=c(5,4.2,2.5,2.5)) n1 = c(1:2) y.limits <- c(0.9, 1.3) plot(c(1,2),data[1,n1], type='b',lty='solid' , pch=21 , bg='black' , ylim=y.limits , xlim=c(0.8,2.2) , ylab=c('First fixation duration on word n+2 [ms]') , xlab=c('N+1 skipping') , axes=F) axis(side=1, at=c(1,2), labels=c("fixated","skipped") , tick=T) axis(side=2) errbar(c(1,2),data[1,n1], yplus=upper[1,n1] , yminus=lower[1,n1], add=T, col="black", lty="solid" , lwd=1.5) errbar(c(1,2),data[2,n1], yplus=upper[2,n1] , yminus=lower[2,n1], add=T, col="black", lty="solid" , lwd=1.5) lines(c(1,2),data[2,n1], type='b', lty='dashed' , pch=21, bg='white') lines(c(1,2),data[1,n1], type='b', lty='solid' , pch=21, bg='black') box(lty='solid') title(c("FixDist to boundary: far")) legend(0.9,1.3, legend=c("easy", "difficult") , title=c("N+2 preview difficulty") , lty=c('solid','dashed') , pch=c(21,21) , pt.bg=c('black','white') , merge=F , ncol=1, #bty='n') , bg='white' , box.col='white') n1 = c(3:4) y.limits <- c(0.8, 1.3) plot(c(1,2),data[1,n1], type='b',lty='solid' , pch=21 , bg='black' , ylim=y.limits , xlim=c(0.8,2.2) , ylab=c('First fixation duration on word n+2 [ms]') , xlab=c('N+1 skipping') , axes=F) axis(side=1, at=c(1,2), labels=c("fixated","skipped") , tick=T) axis(side=2) errbar(c(1,2),data[1,n1], yplus=upper[1,n1] , yminus=lower[1,n1], add=T, col="black", lty="solid" , lwd=1.5) errbar(c(1,2),data[2,n1], yplus=upper[2,n1] , yminus=lower[2,n1], add=T, col="black", lty="solid" , lwd=1.5) lines(c(1,2),data[2,n1], type='b', lty='dashed' , pch=21, bg='white') lines(c(1,2),data[1,n1], type='b', lty='solid' , pch=21, bg='black') box(lty='solid') title(c("FixDist to boundary: near"))