##### Potsdam Mind Research Repository ######

## This script is part of the publication package for the article:

# Hohenstein, S., & Kliegl, R. (2013).
# Eye movements reveal interplay between noun capitalization and word class during reading.
# In M. Knauff, M. Pauen, N. Sebanz, & I. Wachsmuth (Eds.), Proceedings of the 35th Annual 
# Conference of the Cognitive Science Society (pp. 2554-2559). 
# Austin, TX: Cognitive Science Society.
# http://csjarchive.cogsci.rpi.edu/Proceedings/2013/papers/0462/


# Sven Hohenstein, 2013


#### Analyses and graphical presentation ####

rm(list=ls())

# load packages
library(lme4)
library(plyr)
library(ggplot2)

# additional functions
source("remef.v0.6.10.R")
center <- function(x) return(scale(x, scale = FALSE))


### Corpus analyses

## load data
# main data for corpus analyses
dat <- readRDS("data.rds")
# data for regression analysis
dat.reg <- readRDS("data.reg.rds")

## the columns include:
# id - subject id
# sn - sentence id
# cap - capitalization condition (factor)
# CapT - ~ (treatment contrast)
# noun1 - word class of previous word (factor)
# Noun1 - ~ (repeated contrast)
# noun2 - ...next word
# Noun2N - ...
# nound - ...current word
# NounN -...
# gd - gaze duration
# ffd - first-fixation duration
# sfd - single-fixation duration
# f - log frequency of the current word
# lr - length of the current word (reciprocal)
# f2p - log frequency of the next word
# l2r - length of the next word (reciprocal)
# f1 - log frequency of the previous word
# l1r - length of the previous word (reciprocal)
# o - landing position (relative)
# ao - outgoing saccade amplitude
# ao1 - incoming saccade amplitude
# rfx - refixation (boolean)
# s1 - skipping (boolean)
# corr - correctness of answer to comprhension question (NA means: no question)
# reg - regression probability


## main analyses
# Gaze duration
M.gd <- lmer(log(gd) ~ center(f) + center(lr) + center(f2p) + center(l2r) + 
          center(f1) + center(l1r) + poly(o, 2) + center(ao) + center(ao1) +
          CapT:NounN + NounN + CapT:Noun2N + 
          Noun2N + CapT:Noun1N + Noun1N + CapT + 
          (1 | id) + (1 | sn) + (0 + CapT | id) + (0 + CapT | sn), data = dat)
summary(M.gd)

# exclude trials with wrong answers to comprehension questions (Review)
summary(lmer(log(gd) ~ center(f) + center(lr) + center(f2p) + center(l2r) + 
               center(f1) + center(l1r) + poly(o, 2) + center(ao) + center(ao1) +
               CapT:NounN + NounN + CapT:Noun2N + 
               Noun2N + CapT:Noun1N + Noun1N + CapT + 
               (1 | id) + (1 | sn) + (0 + CapT | id) + (0 + CapT | sn), data = dat, subset = corr == 1 | is.na(corr))
)

# First-fixation duration
M.ffd <- lmer(log(ffd) ~ center(f) + center(lr) + center(f2p) + center(l2r) + 
               center(f1) + center(l1r) + poly(o, 2) + center(ao) + center(ao1) +
               CapT:NounN + NounN + CapT:Noun2N + 
               Noun2N + CapT:Noun1N + Noun1N + CapT + 
               (1 | id) + (1 | sn) + (0 + CapT | id) + (0 + CapT | sn), data = dat)
summary(M.ffd)

# Single-fixation duration
M.sfd <- lmer(log(sfd) ~ center(f) + center(lr) + center(f2p) + center(l2r) + 
               center(f1) + center(l1r) + poly(o, 2) + center(ao) + center(ao1) +
               CapT:NounN + NounN + CapT:Noun2N + 
               Noun2N + CapT:Noun1N + Noun1N + CapT + 
               (1 | id) + (1 | sn) + (0 + CapT | id) + (0 + CapT | sn), data = dat)
summary(M.sfd)

# Refixation probability
M.rfx <- glmer(rfx ~ center(f) + center(lr) + center(f2p) + center(l2r) + 
                center(f1) + center(l1r) + poly(o, 2) + center(ao) + center(ao1) +
                CapT:NounN + NounN + CapT:Noun2N + 
                Noun2N + CapT:Noun1N + Noun1N + CapT + 
                (1 | id) + (1 | sn) + (0 + CapT | id) + (0 + CapT | sn), data = dat, family = binomial)
summary(M.rfx)

# Saccade amplitude
M.ao <- lmer(ao ~ center(f) + center(lr) + center(f2p) + center(l2r) + 
                 center(f1) + center(l1r) + poly(o, 2) + center(ao1) +
                 CapT:NounN + NounN + CapT:Noun2N + 
                 Noun2N + CapT:Noun1N + Noun1N + CapT + 
                 (1 | id) + (1 | sn) + (0 + CapT | id) + (0 + CapT | sn), data = dat)
summary(M.ao)

# Skipping probability
M.skip <- glmer(s1 ~ center(f) + center(lr) + center(f2p) + center(l2r) + 
                 center(f1) + center(l1r) + poly(o, 2) + center(ao) + center(ao1) +
                 CapT:NounN + NounN + CapT:Noun2N + 
                 Noun2N + CapT:Noun1N + Noun1N + CapT + 
                 (1 | id) + (1 | sn) + (0 + CapT | id) + (0 + CapT | sn), data = dat, family = binomial)
summary(M.skip)

# Regression probability
M.reg <- glmer(reg ~ 
                  CapT:NounN + NounN + CapT:Noun2N + 
                  Noun2N + CapT + 
                  (1 | id) + (1 | sn) + (0 + CapT | id) + (0 + CapT | sn), data = dat.reg, family = binomial)
summary(M.reg)

# Landing position (relative)
M.o <- lmer(o ~ center(f) + center(lr) + center(f2p) + center(l2r) + 
                  center(f1) + center(l1r) + center(ao) + center(ao1) +
                  CapT:NounN + NounN + CapT:Noun2N + 
                  Noun2N + CapT:Noun1N + Noun1N + CapT + 
                  (1 | id) + (1 | sn) + (0 + CapT | id) + (0 + CapT | sn), data = dat, subset = as.logical(ffd))
summary(M.o)


## Reading speed (word per minutes)

# load data
dat.wpm <- readRDS("data.wpm.rds")

# Analysis
M.wpm <- lmer((wpm) ~ CapT + 
                 (1 | id) + (1 | sn) + (0 + CapT | id) + (0 + CapT | sn),
                 data = dat.wpm)
summary(M.wpm)


### Plot for main corpus analysis (gaze duration)

# 1) remove effects not associated with noun and capitalization
#    for the previous, the current, and the next word seprately
dat.gd2 <-
  cbind(do.call(rbind, rep(list(subset(dat, !is.na(gd))), 3)),
        DV = c(remef(M.gd,
                     fix = c(1, "Noun1N", "CapT", "CapT:Noun1N"),
                     keep = TRUE),
               remef(M.gd,
                     fix = c(1, "NounN", "CapT", "CapT:NounN"),
                     keep = TRUE),
               remef(M.gd,
                     fix = c(1, "Noun2N", "CapT", "CapT:Noun2N"),
                     keep = TRUE)),
        word = rep(c("Previous word", "Current word", "Next word"), each = sum(!is.na(dat$gd))))
dat.gd2$noun <- unlist(dat[!is.na(dat$gd), c("noun1", "noun", "noun2")], use.names = FALSE)

# 2) aggregate data
dat.gd2.agg <- ddply(dat.gd2, .(cap, noun, word), summarise, M = mean(DV), SE = sd((DV))/sqrt(length(DV)))
dat.gd2.agg$word <- ordered(dat.gd2.agg$word, levels = c("Previous word", "Current word", "Next word"))

# 3) create a combined graphic
ybreaks = seq(100, 500, 10)
p <- ggplot(dat.gd2.agg, aes(y = M, x = noun, group = cap))
p <- p + geom_errorbar(aes(min = M - SE, max = M + SE), width = 0.0) 
p <- p + geom_line(size = 1, aes(linetype = cap)) + geom_point(size = 3, aes(shape = cap)) 
p <- p + scale_y_continuous(name="Gaze duration [ms]", 
                            breaks = log(ybreaks), labels = ybreaks)
p <- p + scale_x_discrete(name = "Word class")
p <- p + scale_linetype("Presentation")
p <- p + scale_shape_manual("Presentation", values = c(16, 0))
p <- p + guides(shape = guide_legend(keywidth = 2, keyheight = 1),
                linetype = guide_legend(keywidth = 2, keyheight = 1))
p <- p + facet_wrap( ~ word, ncol = 1)
p <- p + theme_bw()
p <- p + theme(legend.position = "top", legend.direction = "horizontal", 
               legend.box = "horizontal", legend.key = element_blank()) 
p <- p + theme(strip.text.x = element_text(size = 14))
p