library(muhaz) kphaz.plot <- function(fit, minx=0, miny=0, maxx=0, maxy=0, step=FALSE, smooth=FALSE, points=FALSE, ...) { ######################################################################## # "kphaz.plot" is an S function plots the K-M-type hazard # function estimate generated by kphaz.fit. # Required Arguments: # fit: results of a call to kphaz.fit. # ...: additional arguments for the plot function ######################################################################## # # Make sure time and haz exist and are the same length # if(any(names(fit) == "time") & (any(names(fit) == "haz"))) { time <- fit$time haz <- fit$haz } else stop("Arguement \"fit\" must be the result of a call to \"kphaz.fit\"" ) # if(length(time) != length(haz)) stop( "Arguement \"fit\" must be the result of a call to \"kphaz.fit\"" ) # # Check to see if there are any strata # qstrata <- any(names(fit) == "strata") if(qstrata) strata <- fit$strata else strata <- rep(1, length(time)) if(length(strata) != length(haz)) stop( "Arguement \"fit\" must be the result of a call to \"kphaz.fit\"" ) # # Define, ustrata, the number of unique strata # ustrata <- unique(strata) good <- 1:length(ustrata) for(i in 1:length(ustrata)) { cur.strata <- ustrata[i] ind <- strata == ustrata[i] if(all(is.na(haz[ind] | is.nan(haz[ind])))) good <- good[good != i] } ustrata <- ustrata[good] # # Check to see if there are any plots to be made # if(length(ustrata) < 1) stop("No plots") # # Make the first plot # # Gary Feng: add maxx and maxy ind <- (strata == ustrata[1]) & (!is.nan(haz)) # xmax <- max(time) # ymax <- max(haz[((!is.nan(haz)) & (!is.na(haz)))]) if(missing(maxx) | maxx==0){ xmax <- max(time) } else { xmax <-maxx } if(missing(maxy) | maxy==0) { ymax <- max(haz[((!is.nan(haz)) & (!is.na(haz)))]) } else { ymax <- maxy } ## End Gary Feng x <- time[ind] y <- haz[ind] if(min(x) > 0) { x <- c(0, x) y <- c(0, y) } # Gary Feng: empty plot for now # plot(x, y, xlim = c(0, xmax), ylim = c(0, ymax), xlab = "Time", # ylab = "Hazard", type = "s", ...) # plot(x, y, xlim = c(minx, xmax), ylim = c(miny, ymax), xlab = "Time", ylab = "Hazard", type = "n", ...) # # Make any subsequent plots # # if(length(ustrata) > 1) { # for(i in 2:length(ustrata)) { if(length(ustrata) >= 1) { for(i in 1:length(ustrata)) { # ind <- strata == ustrata[i] ind <- (strata == ustrata[i]) & (!is.nan(haz)) & (!is.na(haz)) x <- time[ind] # Gary Feng: the function gives error message: # Error in stepfun(x, y) : 'y' must be one longer than 'x' # adding a zero at the beginning if (step==TRUE) { y <- c(0, haz[ind]) } else { y <- haz[ind] } if(min(x) > 0) { x <- c(0, x) y <- c(0, y) } # Gary Feng: use lines, not steps if (step==TRUE) { lines(stepfun(x, y), lty = i, col=i) } else { if (points) points(x,y,col=i) # Gary Feng: Smooth data if needed if (smooth==TRUE) { y<-smooth(y) } lines(x,y, col=i, lty = i) # lines(x,y, col="light gray", lty = 1) } # end Gary Feng } } # # Return # invisible() } # kphaz.plot(out, maxy=0.02, smooth=TRUE) ############################################### library(segmented) ############################################### # segmented Regression Analysis # # function( # dur: fixation duration vector # initpara: initial guesses of where the change points are # maxtime: end point of the hazard rate # mintime: starting point of the hazard rate # binwidth: force to round original data ############################################### segreg <-function(fixdur, initpara, maxtime=600, mintime=50, binwidth=1) { ################################### # estimate the hazard rate # bin width fixdur<-round(fixdur/binwidth)*binwidth status<-rep(1,length(fixdur)) out1<-kphaz.fit(fixdur,status) print(out1) ################################### # Now do the segmented regression ind<-(out1$time<=maxtime & out1$time>=mintime & !is.na(out1$haz)) d<-data.frame( x=out1$time[ind], y=out1$haz[ind]) h.lm<-lm(y~x,data=d) summary(h.lm) try(h.seg1<-segmented(h.lm,seg.Z=~x,psi=list(x=initpara),control=seg.control(display=FALSE,h=0.5)), silent=FALSE) traceback() #print(summary(h.seg1)) #print(slope(h.seg1)) ################# # plotting setup par(mfrow=c(1, 1)) plot(out1$time, out1$haz, , xlim=c(50,maxtime), ylim=c(-.005,0.03),xlab="Fixation Duration", ylab="Hazard rate/Prob. density",type="n") ################## # plotting ################## i=2 lines(out1$time, out1$hazraw, col="light gray") lines(d$x, smooth(d$y), col=1) plot(h.seg1, add=TRUE, col=i) lines(h.seg1, term="x", col=i, pch=1, k=i*3+10) # add pdf: use a large high limit for breakpoints, else the pdf will reflect the censoring. temp=fixdur[fixdur>0 & fixdur<2000] fixhist<-hist(temp, 10*(1:200), plot=FALSE) histtime<-c(0, fixhist$breaks[1:maxtime]) histpdf<-c(0, fixhist$density[1:maxtime]) lines(histtime,histpdf , col="pink") # add horizontal line at 0,0 lines(c(0,maxtime), c(0,0)) } # No plot segreg2 <-function(fixdur, initpara, maxtime=600, mintime=50, binwidth=20) { ################################### # estimate the hazard rate # bin width fixdur<-round(fixdur/binwidth)*binwidth status<-rep(1,length(fixdur)) out1<-kphaz.fit(fixdur,status) #print(out1) ################################### # Now do the segmented regression ind<-(out1$time<=maxtime & out1$time>=mintime & !is.na(out1$haz)) d<-data.frame( x=out1$time[ind], y=out1$haz[ind]) h.lm<-lm(y~x,data=d) summary(h.lm) print(h.lm) #try(h.seg1<-segmented(h.lm,seg.Z=~x,psi=list(x=initpara),control=seg.control(display=FALSE,h=0.5)), silent=FALSE) h.seg1<-segmented(h.lm,seg.Z=~x,psi=list(x=initpara),control=seg.control(display=FALSE,h=0.5)) traceback() } #library(foreign) #read.spss("study123456.sav") ->allfix # now all data are saved in rdata format load("study123456.rda") # testing, 1, 2, 3 fixdur=sample(allfix$dur[!is.na(allfix$dur) & allfix$dur>1 & allfix$dur<1000], 50000, replace=TRUE, prob=NULL) segreg2(fixdur, initpara=c(180)) ############### # bootstrapping # k= # of samples, n=sample size, alpha=sig level ############### results<-NULL initpara=c(100, 180, 250) maxtime=600 mintime=50 binwidth=20 k=10000; n=5000 for(i in 1:k) { # sample fixdur=sample(allfix$dur[!is.na(allfix$dur)], 50000, replace=TRUE, prob=NULL) # get haz fixdur<-round(fixdur/binwidth)*binwidth status<-rep(1,length(fixdur)) out1<-kphaz.fit(fixdur,status) # seg reg ind<-(out1$time<=maxtime & out1$time>=mintime & !is.na(out1$haz)) d<-data.frame( x=out1$time[ind], y=out1$haz[ind]) h.lm<-lm(y~x,data=d) h.seg1<-NULL try(h.seg1<-segmented(h.lm,seg.Z=~x,psi=list(x=initpara),control=seg.control(display=FALSE,h=0.5)), silent=FALSE) #try(print(summary(h.seg1))) #try(print(slope(h.seg1))) results<-c(results, list(h.seg1)) } save (results, file="bootstrapping10k.rdata") p1<-NULL; p2<-NULL; p3<-NULL; se1<-NULL; se2<-NULL; se3<-NULL; for(i in 1:k) { try(p1[i]<-results[[i]]$"psi"[[1,2]]) try(p2[i]<-results[[i]]$"psi"[[2,2]]) try(p3[i]<-results[[i]]$"psi"[[3,2]]) try(se1[i]<-results[[i]]$"psi"[[1,3]]) try(se2[i]<-results[[i]]$"psi"[[2,3]]) try(se3[i]<-results[[i]]$"psi"[[3,3]]) } ######################## # bootstrapping ######################## bootstat<-function(x,alpha=0.05){ x=x[!is.na(x) & !is.null(x)] meanboot=mean(x) cat("Mean Bootstrap Sample :",meanboot,"\n") varboot=var(x) seboot=sqrt(varboot) cat("bootstrap standard error =",seboot,"\n") cat("\n") lobound=quantile(x,(alpha/2)) upbound=quantile(x,(1-(alpha/2))) cat(((1-alpha)*100),"% Confidence Interval for Mean Statistic:","\n") cat("lower bound =",lobound,"\n") cat("upper bound =",upbound,"\n") hist(x, breaks=50) } bootstat(p1) bootstat(p2) bootstat(p3) #################################### # PART II: can segreg detect changepoints in hazard function? k=1000; m=5000; segregmc<-NULL a=1/2; b=1; d=1; for(i in 1:k) { u<-runif(m) # this is the inversed CDF of the piecewise linear hazard function, # with the first slope a, the 2nd slope b, # changepoint at d. v<-ifelse(u<=1-(exp(-a^2*d^2)), sqrt(log(1/(1-u)))/a, ((b^2-a^2)*d+sqrt(a^4*d^2-(a*b*d)^2-b^2*log(1-u)))/b^2) # plot(v, u) # now estimate the hazard function; discretize to speed up a bit w<-round(v*100)/100 status<-rep(1,length(w)) out<-kphaz.fit(w,status) #kphaz.plot(out, maxy=10, smooth=TRUE) # drop the hazard tail maxtime=2; mintime=0; ind<-(out$time<=maxtime & out$time>=mintime & !is.na(out$haz)) da<-data.frame( x=out$time[ind], y=out$haz[ind]) h.lm<-lm(y~x,data=da) h.seg1<-NULL # use a randomized initial value of 1 +/- 1/3. initpara=0.75+runif(1)/2 try(h.seg1<-segmented(h.lm,seg.Z=~x,psi=list(x=initpara),control=seg.control(display=FALSE,h=0.5)), silent=FALSE) #try(print(summary(h.seg1))) #try(print(slope(h.seg1))) segregmc<-c(segregmc, list(h.seg1)) } save (segregmc, file="segregmc10k.rdata") p1<-NULL; se1<-NULL; for(i in 1:k) { try(p1[i]<-segregmc[[i]]$"psi"[[1,2]]) try(se1[i]<-segregmc[[i]]$"psi"[[1,3]]) } bootstat(p1) bootstat(se1) i=2 # lines(out$time, out$hazraw, col="light gray") lines(d$x, smooth(d$y), col=1) plot(h.seg1, add=TRUE, col=i) lines(h.seg1, term="x", col=i, pch=1, k=i*3+10) #################################### # PART III: Jittering # Dirk Vorberg argued in his review that jittering will mess up the # hazard function and implied that the segmented regression will # not work. Let's test it with the piecewise Weibull with jittering k=1000; m=5000; jitter<-NULL a=1/2; b=1; jitter_sd=0.2; #<-- 95% CI gives 0.6-1.4, pretty large # now let d be a random normal variable d=rnorm(m,1,jitter_sd); for(i in 1:k) { u<-runif(m) # this is the inversed CDF of the piecewise linear hazard function, # with the first slope a, the 2nd slope b, # changepoint at d. v<-ifelse(u<=1-(exp(-a^2*d^2)), sqrt(log(1/(1-u)))/a, ((b^2-a^2)*d+sqrt(a^4*d^2-(a*b*d)^2-b^2*log(1-u)))/b^2) # plot(v, u) # now estimate the hazard function; discretize to speed up a bit w<-round(v*100)/100 status<-rep(1,length(w)) out<-kphaz.fit(w,status) if(k==1) kphaz.plot(out, maxy=4, maxx=3, smooth=TRUE) # drop the hazard tail maxtime=2; mintime=0; ind<-(out$time<=maxtime & out$time>=mintime & !is.na(out$haz)) da<-data.frame( x=out$time[ind], y=out$haz[ind]) h.lm<-lm(y~x,data=da) h.seg1<-NULL # use a randomized initial value of 1 +/- 1/3. initpara=0.75+runif(1)/2 try(h.seg1<-segmented(h.lm,seg.Z=~x,psi=list(x=initpara),control=seg.control(display=FALSE,h=0.5)), silent=FALSE) try(print(summary(h.seg1))) try(print(slope(h.seg1))) jitter<-c(jitter, list(h.seg1)) } save (jitter, file="jitter.rdata") p1<-NULL; se1<-NULL; for(i in 1:k) { try(p1[i]<-jitter[[i]]$"psi"[[1,2]]) try(se1[i]<-jitter[[i]]$"psi"[[1,3]]) } bootstat(p1) bootstat(se1) i=2 # lines(out$time, out$hazraw, col="light gray") lines(d$x, smooth(d$y), col=1) plot(h.seg1, add=TRUE, col=i) lines(h.seg1, term="x", col=i, pch=1, k=i*3+10)