##### Potsdam Mind Research Repository ###### ## This script is part of the publication package for the article: # Yan, Zhou, Shu, Yusupu, Miao, Kruegel, & Kliegl, R. (2014). # Eye Movements Guided by Morphological Structure: # Evidence from the Uighur Language Cognition, 132(2), 181-215. # URL: http://www.sciencedirect.com/science/article/pii/S001002771400047X #--------------------------------------------------------------------------------------------------------------- # Revision History #--------------------------------------------------------------------------------------------------------------- # 29 Apr 2012, r. kliegl # 30 Apr 2012, m. yan # 12 May 2012, r. kliegl # 07 Jun 2012, m. yan # 21 Jun 2012, r. kliegl -- add launch site # 22 Jun 2012, r. kliegl -- start over # 24 Jun 2012, r. kliegl -- version 2, constrain to launch site: 1 <= ls <= 10, take out root length from main analysis # 24 Jun 2012, m. yan -- generate new data file "U1_data_0624.txt", correcting various errors # 24-25 Jun 2012, r. kliegl -- version 3, new data corrected rl; no more zero-values # 26 Jun 2012, r. kliegl -- version 4, corrected log(wf) to log10(wf) # 27 Jun 2012, r. kliegl -- outlier removal; decided against removal # 02 Jul 2012, r. kliegl -- version 5, use log2 wl; center all covariates at zero; decided for removal of outliers # 10 Jul 2012, r. kliegl -- include fixations on space before words, "NEW STUFF" # 21-24 Sep 2012, m. yan -- final check and reorganize, correcting various errors # 04 May 2013, m. yan -- add unique word id and unique root morpheme id # 12 Sep 2013, m. yan -- add length x suffix random slope # 21 Feb 2014, m. yan -- add initial bigram frequency # The following scripts are tested under Window XP SP3, R 3.02 and ggplot2 0.9.3.1 # 22 Mar 2014, m. yan -- Final check for package # Start over with Uighur_package.v2.R # 18 Apr 2014. r. kliegl -- adapt to new lme4 (1.1-7) and R 3.0.3; use only REML=FALSE #--------------------------------------------------------------------------------------------------------------- # VARIABLES #--------------------------------------------------------------------------------------------------------------- # nsub subject id # nsen sentence id # wn word number in a sentence # nw number of words in a sentence # flp first-fixation landing position # ffd first-fixation duration # sfd second-fixation duration # gd gaze duration # ifre if a word fixated 1 or not 0 # ifskip if a word skipped 1 or not 0 # wl word length in letters # ls launch site # wf log10 word frequency # rl the number of root letters/root length # sn the number of suffixes # suftype1 type of 1st suffix: 0 no suffix, 1 derivational suffix, 2 inflectional suffix # suftype2 type of 2nd suffix #--------------------------------------------------------------------------------------------------------------- # Exp 1 #--------------------------------------------------------------------------------------------------------------- library(ggplot2) library(reshape2) library(plyr) library(scales) library(grid) library(lme4) #library(psych) rm(list=ls()) user='rk' # Local functions source("remef.v0.6.10.r") # Plot function theme_set(theme_bw(base_size=10)) stat_sum_single <- function(fun, geom="point", ...) { stat_summary(fun.y=fun, colour="red", geom=geom, size = 3, ...) } vplayout <- function(x, y) { viewport(layout.pos.row = x, layout.pos.col = y) } datraw <- read.table("U1_data_0624.txt",head=TRUE) # New data on 24 June 2012 # datraw: N=26442 #--------------------------------------------------------------------------------------------------------------- # SETUP #--------------------------------------------------------------------------------------------------------------- datraw$ls <- (datraw$sac_in - datraw$flp)/11.3 datraw$ls[which(datraw$wn==1)] = NA # Ming 2012Sep21, first words should have no ls datraw$flp <- datraw$flp/11.3 # FLP change pixel into letters datraw$flp.r <- datraw$flp/datraw$wl-0.5 # relative FLP, centered on word center datraw$slp <- datraw$slp/11.3 # second fixation landing position in letters datraw$wf10 <- log10(datraw$wf) # keep original frequency, generate log10 frequency datraw$skp <- ifelse(datraw$nfix==0, 1, 0) datraw$rfx <- ifelse(datraw$nfix>=2, 1, 0) # Load unique word id and unique root id, 04 May 2013 widinfo <- read.table("wid.txt",head=TRUE) idx = 0 for (i in 1:nrow(datraw)) { idx[i] = which(widinfo$nsen == datraw$nsen[i] & widinfo$wn == datraw$wn[i]) } datraw$nsub <- factor(datraw$nsub) datraw$nsen <- factor(datraw$nsen) # add unique word id and unique root id datraw$wid <- factor(widinfo$wid[idx]) datraw$rmid <- factor(widinfo$rmid[idx]) datraw$bigram <- widinfo$bigram[idx] #--------------------------------------------------------------------------------------------------------------- # SELECTION #--------------------------------------------------------------------------------------------------------------- # Skipped words idx0 <- which(datraw$skp == 1) # N=3758 N_fix <- nrow(datraw) - length(idx0) # number of fixated words: N=22684 dat0 <- datraw[ -idx0, ] sum(dat0$nfix) # number of fiations: N=34661 # Fixations on first words / last word / hyphenated words idx1 <- which(dat0$wn==1 | dat0$nw==dat0$wn | dat0$ifcon == 1 | dat0$fst_lst==1) # N=5712, 25% of N_fix # Fixations following negative launch site; that is a regression (also intraword) idx2 <- which(dat0$ls<0 & dat0$fst_lst==0) # N=122; 0.5% # Short and long fixations idx3 <- which(dat0$ffd < 60 | dat0$ffd > 600 | dat0$gd > 800) # N=1297; 6% # Filter 1 dat1 <- droplevels(dat0[-unique(c(idx1, idx2, idx3)), ]) nrow(dat1)/N_fix # words with first-fixation landing positions < 0 (i.e., FLPs on the space before the word) idx4 <- which(dat1$flp < 0) # 1785 # words with launch site from the last letter of the last word idx5 <- which(dat1$ls < 1) # 304 # Fixations with ls from more than 10 letter before the word idx6 <- which(dat1$ls > 10) # 1105 # Filter 2 dat2 <- droplevels(dat1[-unique(c(idx4, idx5, idx6)), ]) # datraw: N=26442 # dat0: N=22684 # dat1: N=16345 -> 16330 # dat2: N=13529 -> 13523 # relative to datraw: 51%, dat0: 60%; dat1: 83% #--------------------------------------------------------------------------------------------------------------- # FLP Model: log2 Word length, launch site, morphological complexity, word frequency #--------------------------------------------------------------------------------------------------------------- # Center covariates (use wl and sn specification motivated by Exp 2) dat2$wl.c <- log2(dat2$wl) - mean(log2(dat2$wl)) # use log2 2=1, 4=2, 8=3, 16=4 dat2$ls.c <- dat2$ls - mean(dat2$ls) dat2$sn.c <- dat2$sn - mean(dat2$sn) dat2$wf.c <- dat2$wf10 - mean(dat2$wf10) # use log10 of word frequency dat2$bigram.c <- log10(dat2$bigram+1) - mean(log10(dat2$bigram+1)) dat3 <- dat2 # Estimate main variance compenents and correlation parameters for subjects, intercept for sentences print(summary(flp_4.a.wid <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1+wl.c+ls.c+sn.c+wf.c+wl.c*sn.c|nsub) + (1|nsen) + (1|wid), data=dat3, REML=FALSE)), corr=FALSE) # Center log10-transformed initial bigram frequency print(summary(flp_4.b.wid <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c * bigram.c + (1+wl.c+ls.c+sn.c+wf.c+wl.c*sn.c|nsub) + (1|nsen) + (1|wid), data=dat3, REML=FALSE)), corr=FALSE) anova(flp_4.a.wid, flp_4.b.wid) # START 18 Apr 2014 RK # (A) Model flp_4.b.wid probably not optimal: # (1) does not converge correctly w/ lme4 (1.1-7), # (2) no effects associated w/ word frequency (wf), # (3) smallest variance component for wf, # (4) model is degenerate; see: VarCorr(flp_4.b.wid) chf <- matrix(0, nrow=6, ncol=6) (chf[lower.tri(chf, diag=TRUE)] <- getME(flp_4.b.wid, "theta")[3:23]) sv <- svd(chf) round(sv$d^2/sum(sv$d^2)*100) # (B) Leave out word frequency print(summary(flp_4.c.wid <- lmer(flp ~ wl.c * ls.c * sn.c * bigram.c + (1+wl.c+ls.c+sn.c+wl.c:sn.c|nsub) + (1|nsen) + (1|wid), data=dat3, REML=FALSE)), corr=FALSE) anova(flp_4.a.wid, flp_4.b.wid, flp_4.c.wid) # (1) no warnings about convergence, # (2) no loss in goodness in fit, # (3) vc's are significant in drop1-LRTs, but model is still degenerate # (at most three vc's are supported by data); not sure what do to. See: VarCorr(flp_4.c.wid) chf <- matrix(0, nrow=5, ncol=5) (chf[lower.tri(chf, diag=TRUE)] <- getME(flp_4.c.wid, "theta")[3:17]) sv <- svd(chf) round(sv$d^2/sum(sv$d^2)*100) # (C) Leave out word frequency and bigram frequency print(summary(flp_4.d.wid <- lmer(flp ~ wl.c * ls.c * sn.c + (1+wl.c+ls.c+sn.c+wl.c:sn.c|nsub) + (1|nsen) + (1|wid), data=dat3, REML=FALSE)), corr=FALSE) anova(flp_4.a.wid, flp_4.d.wid) # (1) no warnings about convergence, # (2) no loss in goodness in fit, # (3) vc's are significant in drop1-LRTs, but model is still degenerate # (at most three vc's are supported by data); still not sure what do to. See: VarCorr(flp_4.d.wid) chf <- matrix(0, nrow=5, ncol=5) (chf[lower.tri(chf, diag=TRUE)] <- getME(flp_4.d.wid, "theta")[3:17]) sv <- svd(chf) round(sv$d^2/sum(sv$d^2)*100) # In the paper flp_4.a.wid was used for figures; now I would propose to use flp_4.d.wid. Keeping # variance components estimated as zero is probably no good idea, even if the vc is theoretically motivated. # ANOTHER ISSUE # Residuals of LMM; 4 outliers identified in WID model lattice::qqmath(resid(flp_4.c.wid )) iout <- which(resid(flp_4.c.wid ) > 6) # min was -5.96 resid(flp_4.c.wid)[iout] dat3b <- dat3[-iout, ] # Re-estimate last model w/o outliers and non-significant fixed-effect interaction terms print(summary(temp <- lmer(flp ~ (wl.c + ls.c + sn.c + bigram.c)^2 + wl.c:sn.c:bigram.c + (1+wl.c+ls.c+sn.c+wl.c:sn.c|nsub) + (1|nsen) + (1|wid), data=dat3b, REML=FALSE)), corr=FALSE) # Hm. Why do convergence problems depend on removing 2 to 6 outliers? qplot(x=fitted(temp), y=resid(temp), geom="point", shape=I("."), xlab="Fitted values", ylab="Standardized residuals") + geom_hline(yintercept=0) + theme_bw() # + geom_hex() + geom_density2d(size=1) # END 18 APRIL 2014 #--------------------------------------------------------------------------------------------------------------- # Visualization, FLP #--------------------------------------------------------------------------------------------------------------- flp_2.a = flp_4.a.wid matrix(names(fixef(flp_2.a))) #------------------------------------------------- # Description, Table 2 in paper dcast(dat2, value.var = "flp", wl ~ sn, length) ddply(dat2, .(wl, sn), summarize, M=mean(flp), SE=sd(flp)/sqrt(length(flp)), N=length(flp)) # ... check frequencies items <- unique(interaction(dat2$nsen, dat2$wn)) dat.items <- dat2[items, c("nsen", "wn", "wf", "wl", "sn")] dat.items <- dat.items[order(dat.items$wf), ] table(cut(dat2$wf, breaks=c(0, 1, 10, 10^2, 10^3, 10^4, 10^5))) #------------------------------------------------- # Factor variations # ... word length dat2$wl.f <- factor(dat2$wl) dat2$wl.f.log2 <- factor(round(log2(dat2$wl))) dat2$wl.f2 <- factor(ifelse(dat2$wl>=8, 'long', 'short'), levels=c("short", "long"), labels=c("short (2-7)", "long (8-17)")) dat2$wl.f3 <- factor(ifelse(dat2$wl>=10, 'long', ifelse(dat2$wl>=6 & dat2$wl<=9, 'medium', 'short'))) dat2$wl.f3 <- factor(dat2$wl.f3, levels=c("short", "medium", "long"), labels=c("short (2-5)", "medium (6-9)", "long (10-17)")) dat2$wl.f4 <- factor(ifelse(dat2$wl <=5, 'short', ifelse(dat2$wl >= 10, 'long', ifelse(dat2$wl >= 6 & dat2$wl <= 7, 'medium1', 'medium2')))) dat2$wl.f4 <- factor(dat2$wl.f4, levels=c("short", "medium1", "medium2", "long"), labels=c("short (2-5)", "medium (6-7)", "medium (8-9)", "long (10-17)")) # ... ... check dcast(dat2, value.var="flp", wl ~ wl.f4, length) #------------------------------------------------- # ... launch site dat2$ls.f <- round(dat2$ls) dat2$ls.f2 <- factor(ifelse(dat2$ls <= 4, "close", "far"), levels=c("close", "far")) # ... ... check dcast(dat2, value.var="flp", wl.f4 ~ ls.f2, mean) #------------------------------------------------- # ... mophological complexity dat2$sn.f <- factor(dat2$sn) dat2$sn.f2 <- factor(ifelse(dat2$sn == 0, 0, 1), labels=c("-", "+")) # suffix no/yes dat2$sn.f3 <- factor(ifelse(dat2$sn >= 3, 3, ifelse(dat2$sn == 0, 1, 2)), labels=c("0", "1-2", "3-5")) dat2$sn.4 <- ifelse(dat2$sn >= 3, 3, dat2$sn) dat2$sn.f4 <- factor(dat2$sn.4, labels=c("0", "1", "2", "3-5")) # ... ... suffixes WITHIN short, medium, and long words (wl.f3) fl3 <- levels(dat2$wl.f3) dat2$sn_wl3 <- dat2$sn dat2$sn_wl3[dat2$wl.f3 == fl3[2] & dat2$sn >= 2] <- 2 dat2$sn_wl3[dat2$wl.f3 == fl3[3] & dat2$sn == 0] <- 1 dat2$sn_wl3[dat2$wl.f3 == fl3[3] & dat2$sn >= 3] <- 3 dat2$sn_wl3 <- factor(dat2$sn_wl3) # ... ... ... check table(dat2$wl.f3, dat2$sn) table(dat2$wl.f3, dat2$sn_wl3) # ... ... suffixes WITHIN short, medium1, medium2, and long words (wl.f4) fl4 <- levels(dat2$wl.f4) dat2$sn_wl4 <- dat2$sn dat2$sn_wl4[dat2$wl.f4 == fl4[2] & dat2$sn >= 2] <- 2 dat2$sn_wl4[dat2$wl.f4 == fl4[3] & dat2$sn >= 2] <- 2 dat2$sn_wl4[dat2$wl.f4 == fl4[4] & dat2$sn == 0] <- 1 dat2$sn_wl4[dat2$wl.f4 == fl4[4] & dat2$sn >= 3] <- 3 dat2$sn_wl4 <- factor(dat2$sn_wl4) # ... ... ... check table(dat2$wl.f4, dat2$sn) table(dat2$wl.f4, dat2$sn_wl4) #------------------------------------------------- # ... word frequency dat2$wf.f <- round(dat2$wf10) dat2$wf.f2 <- factor(ifelse(dat2$wf.c <= 0, "low", "high"), levels=c("high", "low")) dat2$wf.f3 <- factor(ifelse(dat2$wf.f <= 2, "low", ifelse(dat2$wf.f == 3, "medium", "high")), levels=c("low", "medium", "high")) # ... suffix length (sl) = word length - root length dat2$sl <- dat2$wl - dat2$rl dat2$sl.f2 <- factor(ifelse(dat2$sl <= 7, "simple", "complex")) dat2$sl.f3 <- factor(ifelse(dat2$sl >= 8, 3, ifelse(dat2$sl == 0, 1, 2)), labels=c("0", "1-7", "8-10")) #------------------------------------------------- # Main effects # WL dat2$DV.wl <- remef(flp_2.a, keep=TRUE, grouping=TRUE, fix=c(1, "wl.c"), ran=NULL) ddply(dat2, .(round(log2(wl))), summarise, M=mean(flp), SD=sd(flp), SE=sd(flp)/sqrt(length(flp)), N=length(flp)) ddply(dat2, .(round(log2(wl))), summarise, M=mean(DV.wl), SD=sd(DV.wl), SE=sd(DV.wl)/sqrt(length(DV.wl)), N=length(DV.wl)) # LS dat2$DV.ls <- remef(flp_2.a, keep=TRUE, grouping=TRUE, fix=c(1, "ls.c"), ran=NULL) # SN -- shows increase in observed means, decrease in estimated means dat2$DV.sn <- remef(flp_2.a, keep=TRUE, grouping=TRUE, fix=c(1, "sn.c"), ran=NULL) ddply(dat2, .(sn), summarise, M_obs=mean(flp), M_est=mean(DV.sn)) # WF -- shows decrease in observed means, increase in estimated means (for 2-4) dat2$DV.wf <- remef(flp_2.a, keep=TRUE, grouping=TRUE, fix=c(1, "wf.c"), ran=NULL) ddply(dat2, .(wf.f), summarise, M_obs=mean(flp), M_est=mean(DV.sn), N=length(flp)) # Significant interaction # WL x LS dat2$DV.wl_ls <- remef(flp_2.a, keep=TRUE, grouping=TRUE, fix=c(1, "wl.c", "ls.c", "wl.c:ls.c"), ran=NULL) # WL x SN dat2$DV.wl_sn <- remef(flp_2.a, keep=TRUE, grouping=TRUE, fix=c(1, "wl.c", "sn.c", "wl.c:sn.c"), ran=NULL) #------------------------------------------------- # Plots # add linetype and shape ybreaks = seq(1, 7, 2) #------------------------------------------------- # WL x LS plot, Figure 2 ddply(dat2, .(wl.f4, ls.f), summarise, Obs = mean(flp), Est = mean(DV.wl_ls), N=length(flp) ) p1 <- qplot(data=dat2, x = ls, y = DV.wl_ls, xlab="Launch site", ylab="Fixation landing position", group=wl.f4, linetype=wl.f4, shape=wl.f4, geom="smooth", method="lm") + geom_smooth(method=lm,color="black") + scale_colour_manual(name = "Word length", values = c(1,2,3,4)) + scale_linetype_manual(name = "Word length", values = c(1,2,3,4)) + scale_shape_manual(name = "Word length", values = c(1,2,3,4)) + stat_summary (fun.y = mean, geom="point", size=2, mapping = aes (y=flp, x=ls.f, group=wl.f4) ) + coord_cartesian(ylim=c(0, 7)) + theme_bw() + theme(legend.justification= c(1,1), legend.position=c(1,1) ) pdf ("Fig2.pdf", paper="a4") p1 dev.off () #------------------------------------------------- # SN plot, Figure 3 ddply(dat2, .(sn), summarize, N=length(flp)) # not many fixations with sn > 3 ddply(dat2, .(sn.f4), summarize, N=length(flp)) # collaps sn 3 to 5 # ... aggregate to subject x sn table sn_obs <- ddply(dat2, .(nsub, sn.f4), summarize, M=mean(flp)) sn_GM <- mean(sn_obs$M) # ... remove between-subject variance, add GM back to data sn_obs2 <- ddply(sn_obs, .(nsub), transform, M.w = M - mean(M) + sn_GM) # ... compute M and within-SEs after removal of between-subject variance sn_obs3 <- ddply(sn_obs2, .(sn.f4), summarize, M = mean(M), SE=sd(M.w)/sqrt(length(M.w)), N=length(M.w)) # ... LMM estimates sn_est <- ddply(dat2, .(sn.f4), summarize, M=mean(DV.sn), SE=sd(DV.sn)/sqrt(length(DV.sn)), N=length(DV.sn)) # ... combine obs and est sn_Means <- rbind(sn_obs3, sn_est) sn_Means$Type <- factor(c(rep("observed", 4), rep("estimated", 4)), levels=c("observed", "estimated" )) # ... plot p2 <- qplot(data=sn_Means, x=sn.f4, y=M, xlab= "Number of suffixes", ylab="Fixation landing position", group=Type, linetype=Type, shape=Type, geom=c("point")) + geom_smooth(method=lm,color="black", se=FALSE) + geom_errorbar(aes(ymax=M+2*SE, ymin=M-2*SE), width=0.1, linetype=1) + coord_cartesian(ylim=c(2, 5)) + theme_bw() + theme(legend.title=element_blank(), legend.justification= c(0,1), legend.position=c(0,1)) pdf ("Fig3.pdf", paper="a4") p2 dev.off () #--------------------------------------------------------------------------------------------------------------- # Exp 2 #--------------------------------------------------------------------------------------------------------------- library(ggplot2) library(reshape2) library(plyr) library(scales) library(grid) library(lme4) rm(list=ls()) source("remef.v0.6.10.r") # -------------------------------------------------------------------------------------------------------- # Plot function theme_set(theme_bw(base_size=10)) stat_sum_single <- function(fun, geom="point", ...) { stat_summary(fun.y=fun, colour="red", geom=geom, size = 3, ...) } vplayout <- function(x, y) { viewport(layout.pos.row = x, layout.pos.col = y) } datraw <- read.table("U2_data_0629.txt", head=TRUE, na.strings="-999") datraw <- datraw[which(datraw$nsen!=37 & datraw$nsen!=84),] # single-suffixed words datraw$nsub <- factor(datraw$nsub) datraw$nsen <- factor(datraw$nsen) datraw$skp <- ifelse(datraw$nfix==0,1,0) datraw$rfx <- ifelse(datraw$nfix>=2,1,0) datraw$ls <- (datraw$sac_in - datraw$flp)/10.35 # Needs to computed before conversion datraw$flp <- datraw$flp/10.35 # change pixel into letters datraw$slp <- datraw$slp/10.35 # change pixel into letters datraw$wf10 <- log10(datraw$wf) # keep original frequency, generate log10 frequency # -------------------------------------------------------------------------------------------------------- # Filters twfix <- datraw[datraw$wn==3 & datraw$ffd >= 0, ] length(which(twfix$ffd < 60)) length(which(twfix$ffd > 600)) length(which(twfix$gd > 800)) length(which(twfix$flp < 0)) length(which(twfix$ls < 1)) length(which(twfix$ls > 10)) # Selection for target-word analysis dat.N <- droplevels(subset(datraw, wn == 3 & ffd >= 60 & ffd <= 600 & gd <= 800 & flp >= 0 & ls >= 1 & ls <= 10)) # Centering dat.N$wl.c <- dat.N$wl - 7.5 dat.N$sn.c <- dat.N$sn - 1.0 dat.N$ls.c <- dat.N$ls - mean(dat.N$ls) dat.N$wf.c <- dat.N$wf10 - mean(dat.N$wf10) # Use centered log10 frequency in LMM dat.N$rl.c <- dat.N$rl - mean(dat.N$rl) dat.N$flp.c <- dat.N$flp - mean(dat.N$flp) # ... when used as covariate # Factor definitions (mostly for plotting) dat.N$wl.f <- factor(dat.N$wl) dat.N$wl.f2 <- factor(ifelse(dat.N$wl <= 7, "medium", "long"), levels=c("medium", "long")) dat.N$wl.f3 <- factor(ifelse(dat.N$wl >=8, "8-9", dat.N$wl), levels=c("6", "7", "8-9")) dat.N$ls.f <- round(dat.N$ls) dat.N$ls.f2 <- factor(ifelse(dat.N$ls <= 4, "close", "far"), levels=c("close", "far")) dat.N$sn.f <- factor(dat.N$sn) dat.N$wf.f <- round(dat.N$wf10) dat.N$wf.f2 <- factor(ifelse(dat.N$wf.c <= 0, "low", "high"), levels=c("high", "low")) # Descriptives nrow(dat.N) dcast(dat.N, value.var="flp", wl.f ~ sn.f, length) dcast(dat.N, value.var="flp", wl.f ~ sn.f, function(x) c(M=round(mean(x),1))) qplot(data=dat.N, x=ls, y=flp, geom="smooth", method="lm", formula=y~poly(x, 2)) + stat_summary (fun.y = mean, geom="point", colour="red", size=4, mapping = aes (x=ls.f)) qplot(data=dat.N, x=wl, y=flp, geom="smooth", method="lm", formula=y~poly(x, 1)) + stat_summary (fun.y = mean, geom="point", colour="red", size=4, mapping = aes (x=(wl))) dat.N$wl_r <- -1/dat.N$wl qplot(data=dat.N, x=wl_r, y=flp, geom="smooth", method="lm", formula=y~poly(x, 2)) + stat_summary (fun.y = mean, geom="point", colour="red", size=4, mapping = aes (x=(wl_r))) qplot(data=dat.N, x=wl, y=flp, geom="smooth", method="lm", formula=y~poly(x, 2), group= sn.f, colour = sn.f) + stat_summary (fun.y = mean, geom="point", mapping = aes (x=wl, group=sn.f, colour=sn.f) ) qplot(data=dat.N, x=wf10, y=flp, geom="smooth", method="lm", formula=y~poly(x, 2)) + stat_summary (fun.y = mean, geom="point", mapping = aes(x=round(wf.f)) ) # ...check frequencies items <- unique(interaction(dat.N$nsen)) dat.items <- dat.N[items, c("nsen", "wf", "wf10", "wl", "sn")] dat.items <- dat.items[order(dat.items$wf), ] table(cut(dat.N$wf, breaks=c(0, 1, 10, 10^2, 10^3, 10^4, 10^5))) sum(table(cut(dat.N$wf, breaks=c(0, 1, 10, 10^2, 10^3, 10^4, 10^5)))) # -------------------------------------------------------------------------------------------------------- # (1) FLP model: word length, launch site, suffixes, and word frequency # -------------------------------------------------------------------------------------------------------- # ... VCs and CPs print(summary(flp_69_02.a <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1+wl.c+ls.c+sn.c+wf.c|nsub) + (1|nsen), data=dat.N, REML=FALSE)), corr=FALSE) # START 18 APRIL 2014 VarCorr(flp_69_02.a) chf <- matrix(0, nrow=5, ncol=5) (chf[lower.tri(chf, diag=TRUE)] <- getME(flp_69_02.a, "theta")[2:16]) sv <- svd(chf) round(sv$d^2/sum(sv$d^2)*100) # Model is degenerate print(summary(flp_69_02.b <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+sn.c|nsub) + (0+wf.c|nsub), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.b, flp_69_02.a) # Ensemble of correlation paramenters is not significant # ... drop1 VC at a time print(summary(flp_69_02.b.wl <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1|nsub) + (1|nsen) + (0+ls.c|nsub) + (0+sn.c|nsub) + (0+wf.c|nsub), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.b.wl, flp_69_02.b) # p = .062 print(summary(flp_69_02.b.ls <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+sn.c|nsub) + (0+wf.c|nsub), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.b.ls, flp_69_02.b) # p = .005 print(summary(flp_69_02.b.sn <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.b.sn, flp_69_02.b) # p = 1 print(summary(flp_69_02.b.wf <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+sn.c|nsub), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.b.wf, flp_69_02.b) # p = .021 # ... remove VC for sn print(summary(flp_69_02.b2 <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.b2, flp_69_02.a) # ... fixed effect structure print(summary(flp_69_02.c <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.c, flp_69_02.b2) # ... final model print(summary(flp_69_02.c <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub), data=dat.N, REML=FALSE)), corr=FALSE) # 18 APRIL 2014; check of svd looks ok VarCorr(flp_69_02.c) chf2 <- diag(unname(getME(flp_69_02.c, "theta")[1:5]), 5) sv2 <- svd(chf2) round(sv2$d^2/sum(sv2$d^2)*100) # 04 May 2013, add subject-related random effect for morphological complexity to the final model, # ... as reqested by reviewer, because of theoretical relevance of morph. complexity print(summary(flp_69_02.c.x <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub) + (0+sn.c|nsub), data=dat.N, REML=FALSE)), corr=FALSE) print(summary(flp_69_02.c.y1 <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub) + (0+sn.c|nsub) + (0+sn.c|nsen), data=dat.N, REML=FALSE)), corr=FALSE) print(summary(flp_69_02.c.y2 <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + (1|nsub) + (1|nsen) + sn.c:wl.c + sn.c:ls.c + sn.c:wf.c + sn.c:wl.c:ls.c + sn.c:wl.c:wf.c + sn.c:ls.c:wf.c + sn.c:ls.c:wf.c:wl.c + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub) + (0+sn.c|nsub) + (0+sn.c|nsen), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.c.y1, flp_69_02.c.y2) print(summary(flp_69_02.c.y <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub) + (0+sn.c|nsub) + (0+sn.c|nsen), data=dat.N, REML=FALSE)), corr=FALSE) #mtable(flp_69_02.c, flp_69_02.c.x, flp_69_02.c.y) # START: 18 APRIL 2014; check of svd does NOT look ok: No variance of (0+sn.c|nsub) VarCorr(flp_69_02.c.y) chf3 <- diag(unname(getME(flp_69_02.c.y, "theta")[1:7]), 7) sv3 <- svd(chf3) round(sv3$d^2/sum(sv3$d^2)*100) # END: 18 APRIL 2014; keeping non-significant vc in the model is a bad idea if it makes the # model degenerate. I would choose the model w/o (0+sn.c|nsub) now; 6 vc's supported by svd # 2014 MAR06, add bigram freuquency, # ... as requested by reviewer, because of theoretical relevance of bigram frequency; # ... bigram frequency is not significant and does not change the other effects E2_bigram <- read.table("E2_bigram.txt") dat.N$bigram = 0 for (i in 1:nrow(dat.N)) { dat.N$bigram[i] = ifelse(dat.N$sn[i]==0, E2_bigram[as.numeric(as.character(dat.N$nsen[i])),2], E2_bigram[as.numeric(as.character(dat.N$nsen[i])),1]) } print(summary(flp_69_02.c.y.b1 <- lmer(flp ~ (wl.c + ls.c + sn.c + wf.c) * bigram + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub) + (0+sn.c|nsub) + (0+sn.c|nsen), data=dat.N, REML=FALSE)), corr=FALSE) print(summary(flp_69_02.c.y.b2 <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + bigram + (1|nsub) + (1|nsen) + (0+wl.c|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub) + (0+sn.c|nsub) + (0+sn.c|nsen), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.c.y, flp_69_02.c.y.b2, flp_69_02.c.y.b1) # ... using wl_r; 18 APRIL: added (0+sn.c | nsen); leads to a simpler RE model print(summary(flp_69_02.d <- lmer(flp ~ wl_r * ls.c * sn.c * wf.c + (1|nsub) + (1|nsen) + (0+wl_r|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub) + (0+sn.c|nsub) + (0+sn.c|nsen), data=dat.N, REML=FALSE)), corr=FALSE) print(summary(flp_69_02.e <- lmer(flp ~ wl_r + ls.c + sn.c + wf.c + (1|nsub) + (1|nsen) + (0+wl_r|nsub) + (0+ls.c|nsub) + (0+wf.c|nsub) + (0+sn.c|nsub) + (0+sn.c|nsen), data=dat.N, REML=FALSE)), corr=FALSE) print(summary(flp_69_02.f <- lmer(flp ~ wl_r + ls.c + sn.c + wf.c + (1|nsub) + (1|nsen) + (0+ls.c|nsub) + (0+wf.c|nsub) + (0+sn.c|nsen), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.f, flp_69_02.e, flp_69_02.d) VarCorr(flp_69_02.f) chf4 <- diag(unname(getME(flp_69_02.f, "theta")[1:5]), 5) sv4 <- svd(chf4) round(sv4$d^2/sum(sv4$d^2)*100) # END: 18 APRIL 2014 # rename the model to use the following scripts flp_69_02.c <- flp_69_02.c.y # Residuals of LMM: Looks ok p0 <-qplot(x=fitted(flp_69_02.c), y=resid(flp_69_02.c), geom="point", shape=I("."), size=8, xlab="Fitted values", ylab="Standardized residuals") + geom_hline(yintercept=0) + theme_bw() p0 lattice::qqmath(resid(flp_69_02.c)) # Partial effect: wl ybreaks = seq(1, 7, 2) matrix(names(fixef(flp_69_02.c))) dat.N$DV.wl <- remef(flp_69_02.c, keep=TRUE, grouping=TRUE, fix=c(1, 2), ran=NULL) p1 <- qplot(data=dat.N, x = wl, y = DV.wl, xlab="Word length", ylab="Landing position", group=1, geom="smooth", method="lm") + geom_smooth(method=lm,color="black") + stat_summary (fun.y = mean, geom="point", colour="black", size=2, mapping = aes (x=wl) ) + stat_summary (fun.y = mean, geom="point", shape=2, size=3, mapping = aes (y=flp, x=wl) ) + coord_cartesian(ylim=c(0, 5)) p1 # Partial effect: ls dat.N$DV.ls <- remef(flp_69_02.c, keep=TRUE, grouping=TRUE, fix=c(1, 3), ran=NULL) p2 <- qplot(data=dat.N, x = ls, y = DV.ls, xlab="Launch site", ylab="Landing position", group=1, geom="smooth", method="lm") + geom_smooth(method=lm,color="black") + stat_summary (fun.y = mean, geom="point", colour="black", size=2, mapping = aes (x=ls.f) ) + stat_summary (fun.y = mean, geom="point", shape=2, size=3, mapping = aes (y=flp, x=ls.f) ) + coord_cartesian(ylim=c(0, 5)) p2 # Partial effect: sn dat.N$DV.sn <- remef(flp_69_02.c, keep=TRUE, grouping=TRUE, fix=c(1, 4), ran=NULL) p3 <- qplot(data=dat.N, x = sn, y = DV.sn, xlab="Suffixes", ylab="Landing position", group=1, geom="smooth", method="lm") + geom_smooth(method=lm,color="black") + stat_summary (fun.y = mean, geom="point", colour="black", size=2, mapping = aes (x=sn) ) + stat_summary (fun.y = mean, geom="point", shape=2, size=3, mapping = aes (y=flp, x=sn) ) + coord_cartesian(ylim=c(0, 5)) p3 # Partial effect: wf dat.N$DV.wf <- remef(flp_69_02.c, keep=TRUE, grouping=TRUE, fix=c(1, 5), ran=NULL) p4 <- qplot(data=dat.N, x = wf10, y = DV.wf, xlab="Word frequency", ylab="Landing position", group=1, geom="smooth", method="lm") + geom_smooth(method=lm,color="black") + stat_summary (fun.y = mean, geom="point", colour="black", size=2, mapping = aes (x=wf.f) ) + stat_summary (fun.y = mean, geom="point", shape=2, size=3, mapping = aes (y=flp, x=wf.f) ) + coord_cartesian(ylim=c(0, 5)) p4 #Assemble plots in one figure grid.newpage() pushViewport(viewport(layout = grid.layout(2, 2))) print(p1, vp=vplayout(1,1)) print(p2, vp=vplayout(1,2)) print(p3, vp=vplayout(2,1)) print(p4, vp=vplayout(2,2)) # ---------------------------------------------------------------------------------------------------------- # (2) Other eye-movement measures: FFD, GD, RFX, SKP # -------------------------------------------------------------------------------------------------------- # First-fixation duration (18 APRIL 2014, somewhat simplified) # -------------------------------------------------------------------------------------------------------- # ... full model print(summary(flp_69_02.a.ffd <- lmer(log(ffd) ~ wl.c * ls.c * sn.c * wf.c + (1+wl.c+ls.c+sn.c+wf.c|nsub) + (1|nsen), data=dat.N, REML=FALSE)), corr=FALSE) # ... zcp model print(summary(flp_69_02.z.ffd <- lmer(log(ffd) ~ wl.c * ls.c * sn.c * wf.c + (1+wl.c+ls.c+sn.c+wf.c||nsub) + (1|nsen), data=dat.N, REML=FALSE)), corr=FALSE) VarCorr(flp_69_02.z.ffd) chf_ffd <- diag(unname(getME(flp_69_02.a.ffd, "theta")[1:6]), 6) sv_ffd <- svd(chf_ffd) round(sv_ffd$d^2/sum(sv_ffd$d^2)*100) # ... reduce zcp model print(summary(flp_69_02.z2.ffd <- lmer(log(ffd) ~ wl.c * ls.c * sn.c * wf.c + (1+ls.c||nsub) + (1|nsen), data=dat.N, REML=FALSE)), corr=FALSE) print(summary(flp_69_02.b.ffd <- lmer(log(ffd) ~ wl.c * ls.c * sn.c * wf.c + (1|nsub) + (1|nsen), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.b.ffd, flp_69_02.z2.ffd, flp_69_02.z.ffd, flp_69_02.a.ffd) # ... fixed effect print(summary(flp_69_02.c.ffd <- lmer(log(ffd) ~ wl.c + ls.c + sn.c + wf.c + (1+ls.c|nsub) + (1|nsen), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.c.ffd, flp_69_02.z2.ffd) # Gaze duration # -------------------------------------------------------------------------------------------------------- # ... full model print(summary(flp_69_02.a.gd <- lmer(log(gd) ~ wl.c * ls.c * sn.c * wf.c + (1+wl.c+ls.c+sn.c+wf.c|nsub) + (1|nsen), data=dat.N, REML=FALSE)), corr=FALSE) # ... zcp model print(summary(flp_69_02.z.gd <- lmer(log(gd) ~ wl.c * ls.c * sn.c * wf.c + (1+wl.c+ls.c+sn.c+wf.c||nsub) + (1|nsen), data=dat.N, REML=FALSE)), corr=FALSE) VarCorr(flp_69_02.z.gd) chf_gd <- diag(unname(getME(flp_69_02.a.gd, "theta")[1:6]), 6) sv_gd <- svd(chf_gd) round(sv_gd$d^2/sum(sv_gd$d^2)*100) print(summary(flp_69_02.b.gd <- lmer(log(gd) ~ wl.c * ls.c * sn.c * wf.c + (1|nsub) + (1|nsen), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.b.gd, flp_69_02.z.gd, flp_69_02.a.gd) # ... fixed effect print(summary(flp_69_02.c.gd <- lmer(log(gd) ~ wl.c + ls.c + sn.c + wf.c + (1|nsub) + (1|nsen), data=dat.N, REML=FALSE)), corr=FALSE) anova(flp_69_02.c.gd, flp_69_02.b.gd) # Refixation # -------------------------------------------------------------------------------------------------------- # ... full model print(summary(flp_69_02.a.rfx <- glmer(rfx ~ wl.c * ls.c * sn.c * wf.c + (1+wl.c+ls.c+sn.c+wf.c|nsub) + (1|nsen), family=binomial(link="logit"), data=dat.N)), corr=FALSE) # ... no acceptable convergence # ... zcp model print(summary(flp_69_02.z.rfx <- glmer(rfx ~ wl.c * ls.c * sn.c * wf.c + (1+wl.c+ls.c+sn.c+wf.c||nsub) + (1|nsen), family=binomial(link="logit"), data=dat.N)), corr=FALSE) # ... no acceptable convergence VarCorr(flp_69_02.z.rfx) chf_rfx <- diag(unname(getME(flp_69_02.a.rfx, "theta")[1:5]), 5) sv_rfx <- svd(chf_rfx) round(sv_rfx$d^2/sum(sv_rfx$d^2)*100) print(summary(flp_69_02.b.rfx <- glmer(rfx ~ wl.c * ls.c * sn.c * wf.c + (1|nsub) + (1|nsen), family=binomial(link="logit"), data=dat.N)), corr=FALSE) # ... marginally acceptable convergence anova(flp_69_02.b.rfx, flp_69_02.z.rfx) # ... fixed effect print(summary(flp_69_02.c.rfx <- glmer(rfx ~ wl.c + ls.c + sn.c + wf.c + (1|nsub) + (1|nsen), family=binomial(link="logit"), data=dat.N)), corr=FALSE) # ... marginally acceptable convergence anova(flp_69_02.c.rfx, flp_69_02.b.rfx) # Skipping (note: launchsite is not available for skipped targets) # -------------------------------------------------------------------------------------------------------- dat.skp <- droplevels(subset(datraw, skp <= 1 & wn == 3)) # Center dat.skp$wl.c <- dat.skp$wl - 7.5 dat.skp$sn.c <- as.numeric(as.character(dat.skp$sn)) - 1.0 dat.skp$wf.c <- dat.skp$wf10 - mean(dat.skp$wf10) dat.skp$wf.f <- factor(ifelse(dat.skp$wf.c >= 0, "high", "low")) dat.skp$rl.c <- dat.skp$rl - mean(dat.skp$rl) # ... full model print(summary(flp_69_02.a.skp <- glmer(skp ~ wl.c * sn.c * wf.c + (1+wl.c+sn.c+wf.c|nsub) + (1|nsen), family=binomial(link="logit"), data=dat.skp)), corr=FALSE) # ... no acceptable convergence # ... zcp model print(summary(flp_69_02.z.skp <- glmer(rfx ~ wl.c * sn.c * wf.c + (1+wl.c+sn.c+wf.c||nsub) + (1|nsen), family=binomial(link="logit"), data=dat.skp)), corr=FALSE) # ... no acceptable convergence VarCorr(flp_69_02.z.skp) chf_skp <- diag(unname(getME(flp_69_02.a.rfx, "theta")[1:5]), 5) sv_skp <- svd(chf_skp) round(sv_skp$d^2/sum(sv_skp$d^2)*100) print(summary(flp_69_02.b.skp <- glmer(skp ~ wl.c*sn.c*wf.c + (1|nsub) + (1|nsen), family=binomial(link="logit"), data=dat.skp)), corr=FALSE) # ... marginally acceptable convergence anova(flp_69_02.b.skp, flp_69_02.z.skp) # ... fixed effect print(summary(flp_69_02.c.skp <- glmer(skp ~ wl.c + sn.c + wf.c + (1|nsub) + (1|nsen), family=binomial(link="logit"), data=dat.skp)), corr=FALSE) anova(flp_69_02.c.skp, flp_69_02.b.skp)