# Kliegl, R. & Bates, D.M. (submitted). International collaboration in psychology is on the rise. # # Look at lattice of all journals library(lattice) # Switch to black and white ltheme <- canonical.theme(color = FALSE) lattice.options(default.theme = ltheme) library(reshape) library(lme4, lib.loc = .libUsr) rm(list=ls()) load("psycol.rda") nrow(a) # N=26772 ############################ # Figure 1: Percentages; pooling articles, ignoring journals a2 <- melt(a, id=c("year","collab", "journal_name"), measure=c("nauthors")) a2$Year <- as.numeric(as.character(a2$year)) a2$NAuthor <- ifelse(a2$value <=3, a2$value, 4) a4 <- cast(a2, year ~ collab, length, add.missing=TRUE) a4$Count <- (a4[ ,2]+a4[ ,3]) a4$Percentage <- 100*a4[ ,3]/(a4[ ,2]+a4[ ,3]) a4$Year <- as.numeric(as.character(a4$year)) xyplot(Percentage ~ Year, data=a4, aspect="xy", #scales = list(y=list(log="e")), type = c("g", "p", "smooth"), xlab = "Year", ylab = "International articles [%]") # Probably better ... coef(lm(log(Percentage) ~ I(Year-1990), data=a4)) # (Intercept) I(Year - 1990) # 2.05942498 0.07425993 -> percentages grow at 7.4% curve(exp(2.09524708 + 0.07405819*x), -15, +17) ############################ # Table 1 # ... for 1975-2007 xx <- melt(a, id=c("journal_name", "year", "collab"), measure="nauthors") tab1 <- data.frame(cast(xx, journal_name ~ collab, length, margins="grand_col")) tab1$intperc <- round(tab1$TRUE./(tab1$FALSE.+tab1$TRUE.)*100) tab1 # ... for 2005-2007 yy <- melt(a[a$recyr==1,], id=c("journal_name", "year", "collab"), measure="nauthors") tab1a <- data.frame(cast(yy, journal_name ~ collab, length, margins="grand_col")) tab1a$intperc <- round(tab1a$TRUE./(tab1a$FALSE.+tab1a$TRUE.)*100) tab1a ############################ # Figure 2: Percentages; by journals a3 <- cast(a2, journal_name+year ~ collab, length) a3$Count <- (a3[ ,3]+a3[ ,4]) a3$Percentage <- 100*a3[ ,4]/(a3[ ,3]+a3[ ,4]) a3$Year <- as.numeric(as.character(a3$year)) xyplot(Percentage ~ Year | journal_name, data=a3, aspect="iso", type = c("g", "p", "smooth"), xlab = "Year", ylab = "International articles [%]") a3$Y1 <- ifelse(a3$Year < 1990, a3$Year - 1990, 0) a3$Y2 <- ifelse(a3$Year > 1990, a3$Year - 1990, 0) print(lme <- lmer(Percentage ~ Y1 + Y2 + (1|journal_name), data=a3), cor=FALSE) print(lme.Y2 <- lmer(Percentage ~ Y1 + Y2 + (1|journal_name) + (0+Y2|journal_name), data=a3), cor=FALSE) print(lme.Y1 <- lmer(Percentage ~ Y1 + Y2 + (1|journal_name) + (0+Y1|journal_name), data=a3), cor=FALSE) print(lme.Y2Y1 <- lmer(Percentage ~ Y1 + Y2 + (1|journal_name) + (0+Y2|journal_name) + (0+Y1|journal_name), data=a3), cor=FALSE) anova(lme, lme.Y1, lme.Y2Y1) # ... for Table 1 coef(lme.Y2Y1) ############################ # Figure 3: log(N) for national and international articles; # pooling articles, ignoring journals a4.1 <- cast(a2, collab + year ~ ., length) names(a4.1)[3] <- "N" a4.1$Year <- as.numeric(as.character(a4.1$year)) xyplot(N ~ Year, group=collab, data=a4.1, aspect="xy", type = c("g", "p", "smooth"), scales = list(y=list(log=10)), xlab = "Year", ylab = "log N of articles", auto.key=list( columns = 2, text = c("national articles", "international articles") ) ) ############################ # Figure 3a: raw counts over year xyplot(N ~ Year, group=collab, data=a4.1, type = c("g", "p", "smooth"), xlab = "Year", ylab = "N of articles") ############################ # Figure 3b: power function xyplot(N ~ Year, group=collab, data=a4.1, type = c("g", "p", "smooth"), scales = list(x=list(log=10), y=list(log=10)), xlab = "log Year", ylab = "log N of articles", auto.key=list( columns = 2, text = c("national articles", "international articles") ) ) ############################ # Figure 4: log(N) for national and international articles; by journals a3.1 <- cast(a2, journal_name+year+collab ~ ., length) names(a3.1)[4] <- "N" a3.1 <- a3.1[order(a3.1$journal, a3.1$year, a3.1$collab), ] a3.1$Year <- as.numeric(as.character(a3.1$year)) xyplot(N ~ Year|journal_name, group=collab, data=a3.1, aspect="fill", type = c("g", "p", "smooth"), scales = list(y=list(log=10)), xlab = "Year", ylab = "log N of articles", auto.key=list( columns = 2, text = c("national articles", "international articles") ) ) # ... lmer a3.1$logN <- log10(a3.1$N+1) # increment everything by 1 before taking log; alternative only zeroc a3.1$Collab <- as.factor(a3.1$collab) a3.1$YearC <- a3.1$Year-1990 # Center at 1990 print(lme1.ML <- lmer(logN ~ Collab*poly(YearC,1) + (1|journal_name), REML="TRUE", data=a3.1), cor=FALSE) print(lme2.ML <- lmer(logN ~ Collab*poly(YearC,2) + (1|journal_name), REML="TRUE", data=a3.1), cor=FALSE) anova(lme1.ML, lme2.ML) # no significant quadratic trends, but significant model improvement (AIC, not BIC) # ... ... refit with REML print(lme2 <- lmer(logN ~ Collab*poly(YearC,2) + (1|journal_name), data=a3.1), cor=FALSE) print(lme2.full <- lmer(logN ~ Collab*poly(YearC,2) + (Collab*poly(YearC,2)|journal_name), data=a3.1), cor=FALSE) anova(lme2, lme2.full) coef(lme2.full) qqmath(resid(lme2.full)) # not perfect ... ############################ # Figure 5: log(N) for national and international articles by N of authors check <- data.frame(cast(a2, year ~ collab + NAuthor, length)) a4.2 <- data.frame(cast(a2, year + collab + NAuthor ~ ., length)) names(a4.2)[4] <- "N" a4.2$NAuthor <- as.factor(a4.2$NAuthor) levels(a4.2$NAuthor) <- c("1 author", "2 authors", "3 authors", "4+ authors") a4.2$Collab <- as.factor(a4.2$collab) levels(a4.2$Collab) <- c("national articles", "international articles") a4.2$Year <- as.numeric(as.character(a4.2$year)) xyplot(N ~ Year|Collab, group=NAuthor, data=a4.2, aspect="xy", type = c("g", "p", "smooth"), scales = list(y=list(log=10)), xlab = "Year", ylab = "log N of articles", auto.key=list( columns = 4, text = c("1 author", "2 authors", "3 authors", "4+ authors") ) )