R version 2.12.0 (2010-10-15) Copyright (C) 2010 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i486-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > x <- array(list(0,210907,0,2,0,149061,0,0,0,237213,1,0,0,133131,1,4,0,324799,1,0,0,230964,0,-1,0,236785,1,0,0,344297,1,1,0,174724,1,0,0,174415,1,3,0,223632,1,-1,0,294424,0,4,0,325107,1,3,0,106408,0,1,0,96560,0,0,0,265769,1,-2,0,149112,0,-4,0,152871,0,2,0,362301,1,2,0,183167,0,-4,0,218946,1,2,0,244052,1,2,0,341570,1,0,0,196553,1,-3,0,143246,0,2,0,143756,0,4,0,152299,1,2,0,193339,1,2,0,130585,0,-4,0,112611,1,3,0,148446,1,3,0,182079,0,2,0,243060,1,-1,0,162765,1,-3,0,85574,1,0,0,225060,0,1,0,133328,1,-3,0,100750,1,3,0,101523,1,0,0,243511,1,0,0,152474,1,0,0,132487,1,3,0,317394,0,-3,0,244749,1,0,0,128423,0,2,0,97839,0,-1,1,229242,1,2,1,324598,0,2,1,195838,0,-2,1,254488,0,0,1,92499,1,-2,1,224330,0,0,1,181633,1,6,1,271856,1,-3,1,95227,1,3,1,98146,0,0,1,118612,0,-2,1,65475,1,1,1,108446,0,0,1,121848,0,2,1,76302,1,2,1,98104,0,-3,1,30989,1,-2,1,31774,0,1,1,150580,1,-4,1,59382,0,1,1,84105,0,0),dim=c(4,67),dimnames=list(c('pop','time_in_rfc','gender','total_tests'),1:67)) > y <- array(NA,dim=c(4,67),dimnames=list(c('pop','time_in_rfc','gender','total_tests'),1:67)) > for (i in 1:dim(x)[1]) + { + for (j in 1:dim(x)[2]) + { + y[i,j] <- as.numeric(x[i,j]) + } + } > par4 = 'no' > par3 = '3' > par2 = 'none' > par1 = '2' > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- data.frame(t(y)) > is.data.frame(x) [1] TRUE > x <- x[!is.na(x[,par1]),] > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "time_in_rfc" > x[,par1] [1] 210907 149061 237213 133131 324799 230964 236785 344297 174724 174415 [11] 223632 294424 325107 106408 96560 265769 149112 152871 362301 183167 [21] 218946 244052 341570 196553 143246 143756 152299 193339 130585 112611 [31] 148446 182079 243060 162765 85574 225060 133328 100750 101523 243511 [41] 152474 132487 317394 244749 128423 97839 229242 324598 195838 254488 [51] 92499 224330 181633 271856 95227 98146 118612 65475 108446 121848 [61] 76302 98104 30989 31774 150580 59382 84105 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) 30989 31774 59382 65475 76302 84105 85574 92499 95227 96560 97839 1 1 1 1 1 1 1 1 1 1 1 98104 98146 100750 101523 106408 108446 112611 118612 121848 128423 130585 1 1 1 1 1 1 1 1 1 1 1 132487 133131 133328 143246 143756 148446 149061 149112 150580 152299 152474 1 1 1 1 1 1 1 1 1 1 1 152871 162765 174415 174724 181633 182079 183167 193339 195838 196553 210907 1 1 1 1 1 1 1 1 1 1 1 218946 223632 224330 225060 229242 230964 236785 237213 243060 243511 244052 1 1 1 1 1 1 1 1 1 1 1 244749 254488 265769 271856 294424 317394 324598 324799 325107 341570 344297 1 1 1 1 1 1 1 1 1 1 1 362301 1 > colnames(x) [1] "pop" "time_in_rfc" "gender" "total_tests" > colnames(x)[par1] [1] "time_in_rfc" > x[,par1] [1] 210907 149061 237213 133131 324799 230964 236785 344297 174724 174415 [11] 223632 294424 325107 106408 96560 265769 149112 152871 362301 183167 [21] 218946 244052 341570 196553 143246 143756 152299 193339 130585 112611 [31] 148446 182079 243060 162765 85574 225060 133328 100750 101523 243511 [41] 152474 132487 317394 244749 128423 97839 229242 324598 195838 254488 [51] 92499 224330 181633 271856 95227 98146 118612 65475 108446 121848 [61] 76302 98104 30989 31774 150580 59382 84105 > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/www/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/www/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/1ckzu1323614629.tab") + } + } > m Conditional inference tree with 2 terminal nodes Response: time_in_rfc Inputs: pop, gender, total_tests Number of observations: 67 1) pop <= 0; criterion = 0.973, statistic = 6.797 2)* weights = 46 1) pop > 0 3)* weights = 21 > postscript(file="/var/www/rcomp/tmp/2tonh1323614629.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/www/rcomp/tmp/3hvri1323614629.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } Actuals Forecasts Residuals 1 210907 194610.1 16296.870 2 149061 194610.1 -45549.130 3 237213 194610.1 42602.870 4 133131 194610.1 -61479.130 5 324799 194610.1 130188.870 6 230964 194610.1 36353.870 7 236785 194610.1 42174.870 8 344297 194610.1 149686.870 9 174724 194610.1 -19886.130 10 174415 194610.1 -20195.130 11 223632 194610.1 29021.870 12 294424 194610.1 99813.870 13 325107 194610.1 130496.870 14 106408 194610.1 -88202.130 15 96560 194610.1 -98050.130 16 265769 194610.1 71158.870 17 149112 194610.1 -45498.130 18 152871 194610.1 -41739.130 19 362301 194610.1 167690.870 20 183167 194610.1 -11443.130 21 218946 194610.1 24335.870 22 244052 194610.1 49441.870 23 341570 194610.1 146959.870 24 196553 194610.1 1942.870 25 143246 194610.1 -51364.130 26 143756 194610.1 -50854.130 27 152299 194610.1 -42311.130 28 193339 194610.1 -1271.130 29 130585 194610.1 -64025.130 30 112611 194610.1 -81999.130 31 148446 194610.1 -46164.130 32 182079 194610.1 -12531.130 33 243060 194610.1 48449.870 34 162765 194610.1 -31845.130 35 85574 194610.1 -109036.130 36 225060 194610.1 30449.870 37 133328 194610.1 -61282.130 38 100750 194610.1 -93860.130 39 101523 194610.1 -93087.130 40 243511 194610.1 48900.870 41 152474 194610.1 -42136.130 42 132487 194610.1 -62123.130 43 317394 194610.1 122783.870 44 244749 194610.1 50138.870 45 128423 194610.1 -66187.130 46 97839 194610.1 -96771.130 47 229242 138736.9 90505.143 48 324598 138736.9 185861.143 49 195838 138736.9 57101.143 50 254488 138736.9 115751.143 51 92499 138736.9 -46237.857 52 224330 138736.9 85593.143 53 181633 138736.9 42896.143 54 271856 138736.9 133119.143 55 95227 138736.9 -43509.857 56 98146 138736.9 -40590.857 57 118612 138736.9 -20124.857 58 65475 138736.9 -73261.857 59 108446 138736.9 -30290.857 60 121848 138736.9 -16888.857 61 76302 138736.9 -62434.857 62 98104 138736.9 -40632.857 63 30989 138736.9 -107747.857 64 31774 138736.9 -106962.857 65 150580 138736.9 11843.143 66 59382 138736.9 -79354.857 67 84105 138736.9 -54631.857 > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } > postscript(file="/var/www/rcomp/tmp/4r34b1323614629.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/5ki091323614629.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/64xpx1323614629.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/72em21323614629.tab") + } > > try(system("convert tmp/2tonh1323614629.ps tmp/2tonh1323614629.png",intern=TRUE)) character(0) > try(system("convert tmp/3hvri1323614629.ps tmp/3hvri1323614629.png",intern=TRUE)) character(0) > try(system("convert tmp/4r34b1323614629.ps tmp/4r34b1323614629.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 2.040 0.110 2.131