R version 2.15.2 (2012-10-26) -- "Trick or Treat" Copyright (C) 2012 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > x <- array(list(132838 + ,312991 + ,5599 + ,47645 + ,15545 + ,575093 + ,30406 + ,129842 + ,301647 + ,5234 + ,45970 + ,15001 + ,557560 + ,29511 + ,129694 + ,305353 + ,5279 + ,48069 + ,14961 + ,564478 + ,30857 + ,130080 + ,313665 + ,5391 + ,53080 + ,15245 + ,580523 + ,33161 + ,131496 + ,322402 + ,5280 + ,57896 + ,15656 + ,596594 + ,34097 + ,131556 + ,318280 + ,5173 + ,54344 + ,15577 + ,586570 + ,32108 + ,128925 + ,292852 + ,4724 + ,40482 + ,14630 + ,536214 + ,24789 + ,127836 + ,287481 + ,4554 + ,37110 + ,14336 + ,523597 + ,23238 + ,129164 + ,295210 + ,4713 + ,39263 + ,14834 + ,536535 + ,23915 + ,129531 + ,295650 + ,4811 + ,38889 + ,14921 + ,536322 + ,23764 + ,128548 + ,292919 + ,4668 + ,39593 + ,14707 + ,532638 + ,23990 + ,127330 + ,290649 + ,4516 + ,39305 + ,14516 + ,528222 + ,24071 + ,123815 + ,281687 + ,4203 + ,40560 + ,14055 + ,516141 + ,24448 + ,124393 + ,270336 + ,4016 + ,38306 + ,13493 + ,501866 + ,23794 + ,123707 + ,271420 + ,3993 + ,40911 + ,13528 + ,506174 + ,25025 + ,123736 + ,278183 + ,3971 + ,44700 + ,13719 + ,517945 + ,26921 + ,124507 + ,284913 + ,3838 + ,50328 + ,14170 + ,533590 + ,28575 + ,125005 + ,283487 + ,3891 + ,47499 + ,14009 + ,528379 + ,27256 + ,121383 + ,256677 + ,3306 + ,34446 + ,13159 + ,477580 + ,21065 + ,121200 + ,252945 + ,3235 + ,31434 + ,12927 + ,469357 + ,20454 + ,125249 + ,264963 + ,3404 + ,34066 + ,13510 + ,490243 + ,21672 + ,125253 + ,265988 + ,3400 + ,35044 + ,13520 + ,492622 + ,22217 + ,127977 + ,274857 + ,3447 + ,37040 + ,14089 + ,507561 + ,23046 + ,128984 + ,279650 + ,3431 + ,38706 + ,14251 + ,516922 + ,24364 + ,126770 + ,276715 + ,3321 + ,40430 + ,13980 + ,514258 + ,25631 + ,126448 + ,273887 + ,3189 + ,39613 + ,13715 + ,509846 + ,25360 + ,127845 + ,282308 + ,3256 + ,44236 + ,14112 + ,527070 + ,27534 + ,128818 + ,289847 + ,3290 + ,47859 + ,14289 + ,541657 + ,29853 + ,132127 + ,301101 + ,3475 + ,53711 + ,15020 + ,564591 + ,31554 + ,132338 + ,297008 + ,3454 + ,50352 + ,14860 + ,555362 + ,29788 + ,126645 + ,268909 + ,2806 + ,36142 + ,13800 + ,498662 + ,22779 + ,130625 + ,278383 + ,2777 + ,34819 + ,14431 + ,511038 + ,22576 + ,133506 + ,286226 + ,2865 + ,37353 + ,14944 + ,525919 + ,23572 + ,135277 + ,288936 + ,2924 + ,37550 + ,15083 + ,531673 + ,24132 + ,137664 + ,298953 + ,3011 + ,40462 + ,15707 + ,548854 + ,25699 + ,139821 + ,305837 + ,3099 + ,41753 + ,15954 + ,560576 + ,26960 + ,138440 + ,301979 + ,2988 + ,43437 + ,15631 + ,557274 + ,28005 + ,139879 + ,306281 + ,3032 + ,44784 + ,15813 + ,565742 + ,29114 + ,142256 + ,317057 + ,3131 + ,49537 + ,16356 + ,587625 + ,31945 + ,146322 + ,334780 + ,3343 + ,54974 + ,17086 + ,619916 + ,35559 + ,146389 + ,335895 + ,3275 + ,58535 + ,17302 + ,625809 + ,36259 + ,147841 + ,333874 + ,3243 + ,54762 + ,17247 + ,619567 + ,34018 + ,146449 + ,311028 + ,2897 + ,40738 + ,16398 + ,572942 + ,26543 + ,147960 + ,311767 + ,2818 + ,38052 + ,16590 + ,572775 + ,25672 + ,148487 + ,312575 + ,2836 + ,38436 + ,16673 + ,574205 + ,25925 + ,149802 + ,315040 + ,2721 + ,36993 + ,16962 + ,579799 + ,28472 + ,151387 + ,320325 + ,2742 + ,39056 + ,17278 + ,590072 + ,29669 + ,151936 + ,321178 + ,2707 + ,39996 + ,17224 + ,593408 + ,30786) + ,dim=c(7 + ,48) + ,dimnames=list(c('Basis' + ,'Secundair' + ,'Duaal' + ,'Hoger(Bachelor)' + ,'Leercontract' + ,'Werkloosheid' + ,'Hoger_onderwijs(Master)') + ,1:48)) > y <- array(NA,dim=c(7,48),dimnames=list(c('Basis','Secundair','Duaal','Hoger(Bachelor)','Leercontract','Werkloosheid','Hoger_onderwijs(Master)'),1:48)) > for (i in 1:dim(x)[1]) + { + for (j in 1:dim(x)[2]) + { + y[i,j] <- as.numeric(x[i,j]) + } + } > par4 = 'no' > par3 = '3' > par2 = 'none' > par1 = '6' > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Attaching package: 'zoo' The following object(s) are masked from 'package:base': as.Date, as.Date.numeric Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Hmisc library by Frank E Harrell Jr Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview') to see overall documentation. NOTE:Hmisc no longer redefines [.factor to drop unused levels when subsetting. To get the old behavior of Hmisc type dropUnusedLevels(). Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- data.frame(t(y)) > is.data.frame(x) [1] TRUE > x <- x[!is.na(x[,par1]),] > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "Werkloosheid" > x[,par1] [1] 575093 557560 564478 580523 596594 586570 536214 523597 536535 536322 [11] 532638 528222 516141 501866 506174 517945 533590 528379 477580 469357 [21] 490243 492622 507561 516922 514258 509846 527070 541657 564591 555362 [31] 498662 511038 525919 531673 548854 560576 557274 565742 587625 619916 [41] 625809 619567 572942 572775 574205 579799 590072 593408 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) 469357 477580 490243 492622 498662 501866 506174 507561 509846 511038 514258 1 1 1 1 1 1 1 1 1 1 1 516141 516922 517945 523597 525919 527070 528222 528379 531673 532638 533590 1 1 1 1 1 1 1 1 1 1 1 536214 536322 536535 541657 548854 555362 557274 557560 560576 564478 564591 1 1 1 1 1 1 1 1 1 1 1 565742 572775 572942 574205 575093 579799 580523 586570 587625 590072 593408 1 1 1 1 1 1 1 1 1 1 1 596594 619567 619916 625809 1 1 1 1 > colnames(x) [1] "Basis" "Secundair" [3] "Duaal" "Hoger.Bachelor." [5] "Leercontract" "Werkloosheid" [7] "Hoger_onderwijs.Master." > colnames(x)[par1] [1] "Werkloosheid" > x[,par1] [1] 575093 557560 564478 580523 596594 586570 536214 523597 536535 536322 [11] 532638 528222 516141 501866 506174 517945 533590 528379 477580 469357 [21] 490243 492622 507561 516922 514258 509846 527070 541657 564591 555362 [31] 498662 511038 525919 531673 548854 560576 557274 565742 587625 619916 [41] 625809 619567 572942 572775 574205 579799 590072 593408 > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/wessaorg/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/wessaorg/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/1gzox1354817142.tab") + } + } > m Conditional inference tree with 4 terminal nodes Response: Werkloosheid Inputs: Basis, Secundair, Duaal, Hoger.Bachelor., Leercontract, Hoger_onderwijs.Master. Number of observations: 48 1) Secundair <= 295650; criterion = 1, statistic = 46.371 2) Secundair <= 274857; criterion = 1, statistic = 23.675 3)* weights = 9 2) Secundair > 274857 4)* weights = 17 1) Secundair > 295650 5) Secundair <= 315040; criterion = 1, statistic = 20.506 6)* weights = 14 5) Secundair > 315040 7)* weights = 8 > postscript(file="/var/wessaorg/rcomp/tmp/2x4dl1354817142.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/wessaorg/rcomp/tmp/36el11354817142.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } Actuals Forecasts Residuals 1 575093 566412.4 8680.5714 2 557560 566412.4 -8852.4286 3 564478 566412.4 -1934.4286 4 580523 566412.4 14110.5714 5 596594 602445.1 -5851.1250 6 586570 602445.1 -15875.1250 7 536214 526948.2 9265.7647 8 523597 526948.2 -3351.2353 9 536535 526948.2 9586.7647 10 536322 526948.2 9373.7647 11 532638 526948.2 5689.7647 12 528222 526948.2 1273.7647 13 516141 526948.2 -10807.2353 14 501866 494879.0 6987.0000 15 506174 494879.0 11295.0000 16 517945 526948.2 -9003.2353 17 533590 526948.2 6641.7647 18 528379 526948.2 1430.7647 19 477580 494879.0 -17299.0000 20 469357 494879.0 -25522.0000 21 490243 494879.0 -4636.0000 22 492622 494879.0 -2257.0000 23 507561 494879.0 12682.0000 24 516922 526948.2 -10026.2353 25 514258 526948.2 -12690.2353 26 509846 494879.0 14967.0000 27 527070 526948.2 121.7647 28 541657 526948.2 14708.7647 29 564591 566412.4 -1821.4286 30 555362 566412.4 -11050.4286 31 498662 494879.0 3783.0000 32 511038 526948.2 -15910.2353 33 525919 526948.2 -1029.2353 34 531673 526948.2 4724.7647 35 548854 566412.4 -17558.4286 36 560576 566412.4 -5836.4286 37 557274 566412.4 -9138.4286 38 565742 566412.4 -670.4286 39 587625 602445.1 -14820.1250 40 619916 602445.1 17470.8750 41 625809 602445.1 23363.8750 42 619567 602445.1 17121.8750 43 572942 566412.4 6529.5714 44 572775 566412.4 6362.5714 45 574205 566412.4 7792.5714 46 579799 566412.4 13386.5714 47 590072 602445.1 -12373.1250 48 593408 602445.1 -9037.1250 > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } > postscript(file="/var/wessaorg/rcomp/tmp/4csgi1354817142.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/57tbv1354817142.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/6wtyz1354817142.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/752jd1354817142.tab") + } > > try(system("convert tmp/2x4dl1354817142.ps tmp/2x4dl1354817142.png",intern=TRUE)) character(0) > try(system("convert tmp/36el11354817142.ps tmp/36el11354817142.png",intern=TRUE)) character(0) > try(system("convert tmp/4csgi1354817142.ps tmp/4csgi1354817142.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 5.105 0.485 5.563