# SUPPLEMENTARY ANALYSES for BoundaryN2-Experiments (Manuscript): # EXP.1: n+2 frequency preview manipulation # Sarah Risse, Decmber 2011 rm(list=ls()) # SET WORKING DIRECTORY: getwd() setwd("/Users/Sarah/Documents/EM_Manuscripts/2011.Article.dPoF.N2/pmr2") dir() # INPUT: # (1) data frame containing eye-movement (em) measures for different words: # em_nm1: word n-1 # em_n0: preboundary word n # em_n1: postboundary word n+1 # em_n2: target word n+2 # em_n3: posttarget word n+3 ifile_em <- c("n2FQ_em_filtered.rda") # (2) data frame containing additional word information # based on the dlex-database http://dlexdb.de/: ifile_info <- c("n2FQ_sinfo.rda") ifile_corpus <- c("FQ.DWDS.wid.rda") # OUTPUT: # LIBRARIES: library(lme4) library(reshape) library(MASS) library(Hmisc) library(ggplot2) # FUNCTIONS: source("remef.R") # LOAD DATA: load(ifile_em) ls() # em_n0, em_n1, em_n2, em_n3, em_nm1 str(em_n0) #'data.frame': 7705 obs. of 30 variables: # $ id : subject identification # $ sn : sentence number # $ wn : current word number in sentence # $ nw : total number of words in sentence # $ wid : unique word identification number across all sentences (same word gets same number) # $ cond: n+2 processing demand conditions (1) HF-HF (2) LF-HF (3) HF-LF (4) LF-LF # $ f2bb: log-frequency of word n+2 preview before boundary # $ f2ab: log-frequency of word n+2 target after boundary # $ n2bb: n+2 PREVIEW difficulty (-0.5: easy (HF); 0.5: difficult (LF)) # $ n2ab: n+2 TARGET difficulty (-0.5: easy (HF); 0.5: difficult (LF)) # $ pvn2: n+2 preview condition (-0.5: correct preview - no change; 0.5: incorrect preview - display change) # $ sn1: n+1 skipping status (-0.5: fixated; 0.5: skipped) # $ sn2: n+2 skipping status (-0.5: fixated; 0.5: skipped) # $ lxn1: n+1 lexical status (-0.5: function word; 0.5: content word) # $ f : centered log-frequency of the fixated word # $ wl : centered 1/word length of the fixated word # $ wl1 : centered wl for the word to the left (lag-effect) # $ f1 : centered f for the word to the left (lag-effect) # $ wl2 : centered wl for the word to the right (successor-effect) # $ f2 : centered f for the word to the right (successor-effect) # $ wl3 : centered wl for the word two words to the left # $ f3 : centered f for the word two words to the left # $ wl4 : centered wl for the word two words to the right # $ f4 : centered f for the word two words to the right # $ ffd : first fixation duration # $ gzd : gaze duration # $ sfd : single fixation duration # $ tvt : total viewing time # $ prx : probability of refixation # $ psk : probability of skipping # $ prg : probability of regression # $ ilp : intial landing position (letter position/word length) load(ifile_info) str(sinfo) #'data.frame': 160 obs. of 15 variables: # $ sn : sentence number # $ wtf_hf_abs: absolute word trigram frequency for HF n+2 # $ wtf_lf_abs: absolute word trigram frequency for LF n+2 # $ wtf_hf_pm : word trigram frequency for HF n+2 per million # $ wtf_lf_pm : word trigram frequency for LF n+2 per million # $ wtp_hf : probability of word trigram for HF n+2 # $ wtp_lf : probability of word trigram for LF n+2 # $ f_hf_abs : absolute word frequency for HF n+2 # $ f_lf_abs : absolute word frequency for LF n+2 # $ f_hf_pm : word frequency for HF n+2 per million # $ f_lf_pm : word frequency for LF n+2 per million # $ ibf_hf_pm : initial bigram frequency for HF n+2 (per million) # $ ibf_lf_pm : initial bigram frequency for LF n+2 (per million) # $ itf_hf_pm : initial trigram frequency for HF n+2 (per million) # $ itf_lf_pm : initial trigram frequency for LF n+2 (per million) #------------------------------------# # EXPERIMENT 1: # YOUNG ADULTS (BOUNDARY-N2-FQ): #------------------------------------# #----------------------------------------# # add new corpus information # (e.g., initial bigram/trigram frequency) #----------------------------------------# idxCorpusToData <- match(em_n2$sn, sinfo$sn) em_n2[,c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm", "wtf_lf_pm", "wtp_hf", "wtp_lf", "f_hf_abs" , "f_lf_abs", "f_hf_pm", "f_lf_pm", "ibf_hf_pm", "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm" , "wtp_hf", "wtp_lf")] <- sinfo[idxCorpusToData,c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm" , "wtf_lf_pm", "wtp_hf", "wtp_lf" , "f_hf_abs", "f_lf_abs", "f_hf_pm", "f_lf_pm", "ibf_hf_pm" , "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm", "wtp_hf", "wtp_lf")] # word trigram frequency (word n, n+1, n+2): wtf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$wtf_hf_pm, em_n2$wtf_lf_pm) wtf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$wtf_hf_pm, em_n2$wtf_lf_pm) # conditional probability of word trigram (word n, n+1, n+2): wtp_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$wtp_hf, em_n2$wtp_lf) wtp_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$wtp_hf, em_n2$wtp_lf) # initial bigram frequency of word n+2: ibf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$ibf_hf_pm, em_n2$ibf_lf_pm) ibf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$ibf_hf_pm, em_n2$ibf_lf_pm) # initial trigram frequency of word n+2: itf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$itf_hf_pm, em_n2$itf_lf_pm) itf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$itf_hf_pm, em_n2$itf_lf_pm) # (whole) word frequency of word n+2: f_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$f_hf_pm, em_n2$f_lf_pm) f_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$f_hf_pm, em_n2$f_lf_pm) em_n2[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab) em_n1[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab) em_n0[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab) # TEST CORRELATION BETWEEN PREDICTORS: # see manuscript p. 11 # (1) n2bb vs. word trigram predictability: cor(em_n0$n2bb,em_n0$wtp_bb) # r= -0.06; NOTE: correlations negative because n2bb is coded positive # [-0.5: easy; 0.5: difficult] and continuous predictors are always coded negative # [small values (e.g., low pred): difficult; high values (e.g., high freq): easy] # (2) n2bb vs. initial bigram frequency: cor(em_n0$n2bb,em_n0$ibf_bb) # r = -0.45 # (3) n2bb vs. initial trigram frequency: cor(em_n0$n2bb,em_n0$itf_bb) # r = -0.72 # (4) ibf vs. initial trigram frequency: cor(em_n0$ibf_bb,em_n0$itf_bb) # r = 0.65 # (5) f_bb vs. initial bigram frequency: cor(em_n0$f_bb,em_n0$ibf_bb) # r = 0.45 # (6) f_bb vs. initial trigram frequency: cor(em_n0$f_bb,em_n0$itf_bb) # r = 0.77 #----------------------------------# # MAIN ANALYSES: # for word n, n+1, and n+2 #----------------------------------# ############################### # WORD N ############################### xdat <- em_n0 # LMMs REPORTED IN MAIN ANALYSIS: lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.1 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.2 <- lmer(log(sfd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.0, cor=F) print(lm.1, cor=F) print(lm.2, cor=F) ############################### # WORD N+1 ############################### xdat <- em_n1 # LMMs REPORTED IN MAIN ANALYSIS: lm.0 <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.1 <- lmer(log(ffd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.2 <- lmer(log(sfd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.0, cor=F) print(lm.1, cor=F) print(lm.2, cor=F) ############################### # WORD N+2 ############################### xdat <- em_n2 # LMMs REPORTED IN MAIN ANALYSIS: lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.1 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) lm.2 <- lmer(log(sfd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.0, cor=F) print(lm.1, cor=F) print(lm.2, cor=F) #----------------------------------# # SUPPLEMENTARY ANALYSES: # for word n+1 #----------------------------------# # check remaining lexical frequency effect of n+2 preview in residuals after # accounting for all variance associated with sublexical bigram or trigram frequency ... ############################### # WORD N+1 ############################### xdat <- em_n1 # COMPLETE MODEL WITH ALL PREDICTORS: #------------------------ # GD #------------------------ lm.org <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.org, cor=F) lm.bigram <- lmer(log(gzd) ~ (lxn1+ibf_bb+ibf_ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.bigram, cor=F) lm.trigram <- lmer(log(gzd) ~ (lxn1+itf_bb+itf_ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.trigram, cor=F) bigram.res <- resid(lm.bigram) trigram.res <- resid(lm.trigram) summary(lm(bigram.res ~ lm.org@frame$n2bb)) summary(lm(trigram.res ~ lm.org@frame$n2bb)) #summary(lm(bigram.res ~ lm.org@frame$n2ab)) #summary(lm(bigram.res ~ lm.org@frame$n2bb*lm.org@frame$n2ab)) #summary(lm(trigram.res ~ lm.org@frame$n2ab)) #summary(lm(trigram.res ~ lm.org@frame$n2bb+lm.org@frame$n2ab)) #------------------------ # FFD #------------------------ lm.org <- lmer(log(ffd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.org, cor=F) lm.bigram <- lmer(log(ffd) ~ (lxn1+ibf_bb+ibf_ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.bigram, cor=F) lm.trigram <- lmer(log(ffd) ~ (lxn1+itf_bb+itf_ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.trigram, cor=F) bigram.res <- resid(lm.bigram) trigram.res <- resid(lm.trigram) summary(lm(bigram.res ~ lm.org@frame$n2bb)) summary(lm(trigram.res ~ lm.org@frame$n2bb)) #summary(lm(bigram.res ~ lm.org@frame$n2ab)) #summary(lm(bigram.res ~ lm.org@frame$n2bb*lm.org@frame$n2ab)) #summary(lm(trigram.res ~ lm.org@frame$n2ab)) #summary(lm(trigram.res ~ lm.org@frame$n2bb+lm.org@frame$n2ab)) #------------------------ # SFD #------------------------ lm.org <- lmer(log(sfd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.org, cor=F) lm.bigram <- lmer(log(sfd) ~ (lxn1+ibf_bb+ibf_ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.bigram, cor=F) lm.trigram <- lmer(log(sfd) ~ (lxn1+itf_bb+itf_ab)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.trigram, cor=F) bigram.res <- resid(lm.bigram) trigram.res <- resid(lm.trigram) summary(lm(bigram.res ~ lm.org@frame$n2bb)) summary(lm(trigram.res ~ lm.org@frame$n2bb)) #summary(lm(bigram.res ~ lm.org@frame$n2ab)) #summary(lm(bigram.res ~ lm.org@frame$n2bb*lm.org@frame$n2ab)) #summary(lm(trigram.res ~ lm.org@frame$n2ab)) #summary(lm(trigram.res ~ lm.org@frame$n2bb+lm.org@frame$n2ab)) #----------------------------------# # NOT REPORTED: # ADDITOPNAL ANALYSES # for word n and n+2 #----------------------------------# ############################### # WORD N ############################### xdat <- em_n0 # COMPLETE MODEL WITH ALL PREDICTORS: lm.org <- lmer(log(gzd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.org, cor=F) lm.bigram <- lmer(log(gzd) ~ (sn1+lxn1+ibf_bb)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.bigram, cor=F) bigram.res <- resid(lm.bigram) summary(lm(bigram.res ~ lm.org@frame$n2bb)) lm.trigram <- lmer(log(gzd) ~ (sn1+lxn1+itf_bb)^3 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.trigram, cor=F) trigram.res <- resid(lm.trigram) summary(lm(trigram.res ~ lm.org@frame$n2bb)) ############################### # WORD N+2 ############################### xdat <- em_n2 # COMPLETE MODEL WITH ALL PREDICTORS: lm.org <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.org, cor=F) lm.bigram <- lmer(log(gzd) ~ (sn1+lxn1+ibf_bb+ibf_ab)^4 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.bigram, cor=F) bigram.res <- resid(lm.bigram) summary(lm(bigram.res ~ lm.org@frame$n2bb)) summary(lm(bigram.res ~ lm.org@frame$n2ab)) summary(lm(bigram.res ~ lm.org@frame$n2bb*lm.org@frame$n2ab)) lm.trigram <- lmer(log(gzd) ~ (sn1+lxn1+itf_bb+itf_ab)^4 + (1|id) + (1|wid) + (1|sn) , data=xdat, REML=TRUE) print(lm.trigram, cor=F) trigram.res <- resid(lm.trigram) summary(lm(trigram.res ~ lm.org@frame$n2bb)) summary(lm(trigram.res ~ lm.org@frame$n2ab)) summary(lm(trigram.res ~ lm.org@frame$n2bb*lm.org@frame$n2ab))