require(mgcv) require(methods) source("projectMarginal.R") # Load data load("DSdata.rda") # Remove outliner: data = data[data$word != "Beziehungen",] # Recode covariates to different metric # ... word length in terms of log2 data$ll <- log2(1/data$l) data$ll1 <- log2(1/data$l1) data$ll2 <- log2(1/data$l2) data$id <- as.factor(data$id); data$wid <- as.factor(data$wid); # ... probs of predictabilities data$prb <- exp(data$p)/(1+exp(data$p)) data$prb <- ifelse(data$prb==min(data$prb), 0, data$prb) # No perfectly predicted word was fixated data$prb1 <- exp(data$p1)/(1+exp(data$p1)) data$prb1 <- ifelse(data$prb1==min(data$prb1), 0, ifelse(data$prb1==max(data$prb1), 1, data$prb1)) data$prb2 <- exp(data$p2)/(1+exp(data$p2)) data$prb2 <- ifelse(data$prb2==min(data$prb2), 0, ifelse(data$prb2==max(data$prb2), 1, data$prb2)) data$s1s2 <- factor(paste(data$s1, data$s2, sep=""), labels=c("FF", "FT", "TF", "TT")) data$x0x1 <- factor(paste(data$x, data$x1, sep=""), levels=c("00", "10", "01", "11"), labels=c("CC", "FC", "CF", "FF")) # Select only FF: all_data <- data[data$s1s2 == "FF",]; # Define dimension of basis k = 10; k2 = k**2; n_cores=4; N_subj = 15; N_sample = 40; N_iter = 100; N_points = 64 post_m_f1 <- array(0, dim=c(N_iter, N_points)) post_m_f <- array(0, dim=c(N_iter, N_points)) post_m_f2 <- array(0, dim=c(N_iter, N_points)) post_m_f1f <- list(); post_m_ff2 <- list(); ten_m_f1 <- array(0, dim=c(N_iter, N_points)) ten_m_f <- array(0, dim=c(N_iter, N_points)) ten_m_f2 <- array(0, dim=c(N_iter, N_points)) ten_m_f1f <- list(); ten_m_ff2 <- list(); subjs <- sample(unique(all_data$id), N_subj) for (iteration in 1:N_iter) { print(sprintf("Iteration %i/%i", iteration, N_iter)) subset_data <- NULL for (subj in subjs) { subj_data <- all_data[(all_data$id==subj),] sample_idx = sample(1:dim(subj_data)[1], N_sample); subset_data <- rbind(subset_data, subj_data[sample_idx, ]) } subset_data$id <- as.factor(as.integer(subset_data$id)) newdata = list(ls=0, ll1=0, ll=0, ll2=0, prb1=0, prb=0, prb2=0, f1=0, f=0, f2=0, id=subjs[[1]], wid=subset_data$wid[1]); # Fit simple spline model to subset model_tps <- bam(ldur ~ s(ls, k=10) + s(ll1, k=5) + s(ll,k=5) + s(ll2,k=5) + s(f1, f, k=k2) + s(f, f2, k=k2) + s(id, bs="re") + s(wid, bs="re"), data=subset_data, cluster=n_cores) P_f1f <- projectMarginals(model_tps, "s(f1,f)", newdata, N_points); P_ff2 <- projectMarginals(model_tps, "s(f,f2)", newdata, N_points); post_m_f1[iteration,] <- P_f1f$Px; post_m_f[iteration,] <- P_f1f$Py + P_ff2$Px; post_m_f2[iteration,] <- P_ff2$Py; post_m_f1f[[iteration]] <- P_f1f$Pxy; post_m_ff2[[iteration]] <- P_ff2$Pxy; # Fit SS-ANOVA model to subset: fit_failed <- TRUE; model_ten <- NA; while(is.na(model_ten)) { try (model_ten <- bam(ldur ~ s(ls, k=10) + s(ll1,k=5) + s(ll,k=5) + s(ll2,k=5) + ti(f1, bs="cr", k=k) + ti(f, bs="cr", k=k) + ti(f2, bs="cr", k=k) + ti(f1, f, bs="cr", k=c(k,k)) + ti(f,f2, bs="cr", k=c(k,k)) + s(id, bs="re") + s(wid, bs="re"), data=subset_data, cluster=n_cores), silent=TRUE); if (is.na(model_ten)) { print("Fit failed -> retry.") subset_data <- NULL for (subj in subjs) { subj_data <- all_data[(all_data$id==subj),] sample_idx = sample(1:dim(subj_data)[1], N_sample); subset_data <- rbind(subset_data, subj_data[sample_idx, ]) } subset_data$id <- as.factor(as.integer(subset_data$id)) } } sample_f1 <- sampleData(model_ten, c("f1"), newdata, N_points); sample_f1$id <- as.factor(as.integer(sample_f1$id)) sample_f <- sampleData(model_ten, c("f"), newdata, N_points); sample_f $id <- as.factor(as.integer(sample_f$id)) sample_f2 <- sampleData(model_ten, c("f2"), newdata, N_points); sample_f2$id <- as.factor(as.integer(sample_f2$id)) sample_f1f <- sampleData(model_ten, c("f1", "f"), newdata, N_points); sample_f1f$id <- as.factor(as.integer(sample_f1f$id)) sample_ff2 <- sampleData(model_ten, c("f", "f2"), newdata, N_points); sample_ff2$id <- as.factor(as.integer(sample_ff2$id)) ten_m_f1[iteration,] <- predict.gam(model_ten, sample_f1, type="terms", se.fit=FALSE, terms=c("ti(f1)")) ten_m_f[iteration,] <- predict.gam(model_ten, sample_f, type="terms", se.fit=FALSE, terms=c("ti(f)")) ten_m_f2[iteration,] <- predict.gam(model_ten, sample_f2, type="terms", se.fit=FALSE, terms=c("ti(f2)")) ten_m_f1f[[iteration]] <- predict.gam(model_ten, sample_f1f, type="terms", se.fit=FALSE)[,"ti(f1,f)"] ten_m_ff2[[iteration]] <- predict.gam(model_ten, sample_ff2, type="terms", se.fit=FALSE)[,"ti(f,f2)"] } pdf(file=sprintf("example_compare_%s_%s.pdf", N_subj, N_sample), width=7, height=4) ylim = c(-0.15,0.15) layout(matrix(c(1,2,3,4,5,6), 2,3, byrow=T), widths=c(1.4,1.1,1), heights=c(1,1.3)) par(oma=c(1,1,1,1)) # # Plot tensor: # f1_mean <- apply(ten_m_f1, 2, mean); f1_std <- apply(ten_m_f1, 2, sd) f1_range <- range(f1_mean+f1_std, f1_mean-f1_std) f_mean <- apply(ten_m_f, 2, mean); f_std <- apply(ten_m_f, 2, sd) f_range <- range(f_mean+f_std, f_mean-f_std) f2_mean <- apply(ten_m_f2, 2, mean); f2_std <- apply(ten_m_f2, 2, sd) f2_range <- range(f2_mean+f2_std, f2_mean-f2_std) m_f1f <- matrix(0, N_points, N_points); m_ff2 <- matrix(0, N_points, N_points); v_f1f <- matrix(0, N_points, N_points); v_ff2 <- matrix(0, N_points, N_points); for (i in 1:N_iter) { m_f1f <- m_f1f + ten_m_f1f[[i]]/N_iter; m_ff2 <- m_ff2 + ten_m_ff2[[i]]/N_iter; } for (i in 1:N_iter) { v_f1f <- v_f1f + (ten_m_f1f[[i]]-m_f1f)**2; v_ff2 <- v_ff2 + (ten_m_ff2[[i]]-m_ff2)**2; } print(sprintf("Tensor Var f1: %s", sum(apply(ten_m_f1, 2, var)))) print(sprintf("Tensor Var f: %s", sum(apply(ten_m_f, 2, var)))) print(sprintf("Tensor Var f2: %s", sum(apply(ten_m_f2, 2, var)))) print(sprintf("Tensor Var f1,f: %s", sum(diag(v_f1f)))) print(sprintf("Tensor Var f,f2: %s", sum(diag(v_ff2)))) par(mar=c(1,4.1,0,1)) plot(sample_f1$f1, f1_mean, ylim=ylim, type="l", lty=1, ylab="log fix. dur.", xaxt="n"); lines(sample_f1$f1, f1_mean + f1_std, lty=2); lines(sample_f1$f1, f1_mean - f1_std, lty=2) par(mar=c(1,0,0,1)) plot(sample_f$f, f_mean, ylim=ylim, type="l", lty=1, ylab="", yaxt="n", xaxt="n"); lines(sample_f$f, f_mean + f_std, lty=2); lines(sample_f$f, f_mean - f_std, lty=2) par(mar=c(1,0,0,0)) plot(sample_f2$f2, f2_mean, ylim=ylim, type="l", lty=1, ylab="", yaxt="n", xaxt="n"); lines(sample_f2$f2, f2_mean + f2_std, lty=2); lines(sample_f2$f2, f2_mean - f2_std, lty=2) # # Plot Post-hoc # f1_mean <- apply(post_m_f1, 2, mean); f1_std <- apply(post_m_f1, 2, sd) f1_range <- range(f1_mean+f1_std, f1_mean-f1_std) f_mean <- apply(post_m_f, 2, mean); f_std <- apply(post_m_f, 2, sd) f_range <- range(f_mean+f_std, f_mean-f_std) f2_mean <- apply(post_m_f2, 2, mean); f2_std <- apply(post_m_f2, 2, sd) f2_range <- range(f2_mean+f2_std, f2_mean-f2_std) m_f1f <- matrix(0, N_points, N_points); m_ff2 <- matrix(0, N_points, N_points); v_f1f <- matrix(0, N_points, N_points); v_ff2 <- matrix(0, N_points, N_points); for (i in 1:N_iter) { dim(post_m_f1f[[i]]) <- c(N_points,N_points) dim(post_m_ff2[[i]]) <- c(N_points,N_points) m_f1f <- m_f1f + post_m_f1f[[i]]/N_iter; m_ff2 <- m_ff2 + post_m_ff2[[i]]/N_iter; } for (i in 1:N_iter) { v_f1f <- v_f1f + (post_m_f1f[[i]]-m_f1f)**2; v_ff2 <- v_ff2 + (post_m_ff2[[i]]-m_ff2)**2; } print(sprintf("Post-hoc Var f1: %s", sum(apply(post_m_f1, 2, var)))) print(sprintf("Post-hoc Var f: %s", sum(apply(post_m_f, 2, var)))) print(sprintf("Post-hoc Var f2: %s", sum(apply(post_m_f2, 2, var)))) print(sprintf("Post-hoc Var f1,f: %s", sum(diag(v_f1f)))) print(sprintf("Post-hoc Var f,f2: %s", sum(diag(v_ff2)))) par(mar=c(4.5,4.1,0,1)) plot(sample_f1$f1, f1_mean, ylim=ylim, type="l", lty=1, xlab="log freq. word N-1", ylab="log fix. dur."); lines(sample_f1$f1, f1_mean + f1_std, lty=2); lines(sample_f1$f1, f1_mean - f1_std, lty=2) par(mar=c(4.5,0,0,1)) plot(sample_f$f, f_mean, ylim=ylim, type="l", lty=1, xlab="log freq. word N", ylab="", yaxt="n"); lines(sample_f$f, f_mean + f_std, lty=2); lines(sample_f$f, f_mean - f_std, lty=2) par(mar=c(4.5,0,0,0)) plot(sample_f2$f2, f2_mean, ylim=ylim, type="l", lty=1, xlab="log freq. word N+1", ylab="", yaxt="n"); lines(sample_f2$f2, f2_mean + f2_std, lty=2); lines(sample_f2$f2, f2_mean - f2_std, lty=2) dev.off()