R version 2.13.0 (2011-04-13) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i486-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > x <- array(list(2 + ,210907 + ,56 + ,396 + ,79 + ,30 + ,1 + ,0 + ,0 + ,149061 + ,44 + ,656 + ,43 + ,26 + ,1 + ,0 + ,0 + ,237213 + ,84 + ,655 + ,78 + ,38 + ,1 + ,1 + ,4 + ,133131 + ,55 + ,525 + ,44 + ,30 + ,1 + ,1 + ,0 + ,324799 + ,154 + ,1436 + ,158 + ,47 + ,1 + ,1 + ,0 + ,230964 + ,53 + ,612 + ,102 + ,30 + ,1 + ,0 + ,0 + ,236785 + ,119 + ,865 + ,77 + ,31 + ,1 + ,1 + ,1 + ,344297 + ,75 + ,963 + ,80 + ,30 + ,1 + ,1 + ,0 + ,174724 + ,92 + ,966 + ,123 + ,34 + ,1 + ,1 + ,3 + ,174415 + ,100 + ,801 + ,73 + ,31 + ,1 + ,1 + ,0 + ,223632 + ,73 + ,513 + ,105 + ,33 + ,1 + ,1 + ,4 + ,294424 + ,77 + ,992 + ,107 + ,33 + ,1 + ,0 + ,1 + ,106408 + ,30 + ,260 + ,33 + ,14 + ,1 + ,0 + ,0 + ,96560 + ,76 + ,503 + ,42 + ,17 + ,0 + ,0 + ,0 + ,265769 + ,146 + ,927 + ,96 + ,32 + ,1 + ,1 + ,0 + ,149112 + ,56 + ,537 + ,56 + ,35 + ,1 + ,0 + ,2 + ,152871 + ,58 + ,532 + ,59 + ,28 + ,1 + ,0 + ,2 + ,362301 + ,119 + ,1635 + ,76 + ,34 + ,1 + ,1 + ,0 + ,183167 + ,66 + ,557 + ,91 + ,39 + ,1 + ,0 + ,2 + ,218946 + ,41 + ,866 + ,76 + ,29 + ,1 + ,1 + ,2 + ,244052 + ,68 + ,574 + ,101 + ,44 + ,1 + ,1 + ,0 + ,341570 + ,168 + ,1276 + ,94 + ,21 + ,0 + ,1 + ,0 + ,196553 + ,57 + ,503 + ,41 + ,29 + ,1 + ,1 + ,2 + ,143246 + ,103 + ,464 + ,67 + ,27 + ,1 + ,0 + ,0 + ,167488 + ,45 + ,619 + ,69 + ,28 + ,1 + ,0 + ,4 + ,143756 + ,46 + ,479 + ,105 + ,34 + ,1 + ,0 + ,2 + ,152299 + ,53 + ,537 + ,62 + ,33 + ,1 + ,1 + ,2 + ,193339 + ,78 + ,465 + ,100 + ,35 + ,1 + ,1 + ,0 + ,130585 + ,46 + ,299 + ,67 + ,29 + ,1 + ,0 + ,3 + ,112611 + ,41 + ,248 + ,46 + ,20 + ,0 + ,1 + ,3 + ,148446 + ,91 + ,905 + ,135 + ,37 + ,1 + ,1 + ,2 + ,182079 + ,63 + ,512 + ,124 + ,33 + ,1 + ,0 + ,0 + ,243060 + ,63 + ,786 + ,58 + ,29 + ,1 + ,1 + ,0 + ,162765 + ,32 + ,489 + ,68 + ,28 + ,1 + ,1 + ,0 + ,85574 + ,34 + ,351 + ,37 + ,21 + ,0 + ,1 + ,1 + ,225060 + ,93 + ,669 + ,93 + ,41 + ,1 + ,0 + ,0 + ,133328 + ,55 + ,506 + ,56 + ,20 + ,0 + ,1 + ,3 + ,100750 + ,72 + ,407 + ,83 + ,30 + ,1 + ,1 + ,0 + ,101523 + ,42 + ,316 + ,59 + ,22 + ,0 + ,1 + ,0 + ,243511 + ,71 + ,603 + ,133 + ,42 + ,1 + ,1 + ,0 + ,152474 + ,65 + ,577 + ,106 + ,32 + ,1 + ,1 + ,3 + ,132487 + ,41 + ,411 + ,71 + ,36 + ,1 + ,1 + ,0 + ,317394 + ,86 + ,975 + ,116 + ,31 + ,1 + ,0 + ,0 + ,244749 + ,95 + ,964 + ,98 + ,33 + ,1 + ,1 + ,2 + ,128423 + ,64 + ,369 + ,32 + ,38 + ,1 + ,0 + ,0 + ,97839 + ,38 + ,417 + ,25 + ,24 + ,1 + ,0 + ,2 + ,229242 + ,247 + ,719 + ,63 + ,31 + ,1 + ,1 + ,2 + ,324598 + ,110 + ,1402 + ,113 + ,37 + ,1 + ,0 + ,0 + ,195838 + ,67 + ,564 + ,111 + ,31 + ,1 + ,0 + ,0 + ,254488 + ,83 + ,747 + ,120 + ,39 + ,1 + ,0 + ,0 + ,92499 + ,32 + ,319 + ,25 + ,18 + ,0 + ,1 + ,0 + ,224330 + ,83 + ,612 + ,131 + ,39 + ,1 + ,0 + ,6 + ,181633 + ,70 + ,564 + ,47 + ,30 + ,1 + ,1 + ,0 + ,271856 + ,103 + ,824 + ,109 + ,37 + ,1 + ,1 + ,3 + ,95227 + ,34 + ,239 + ,37 + ,32 + ,1 + ,1 + ,0 + ,98146 + ,40 + ,459 + ,15 + ,17 + ,0 + ,0 + ,0 + ,118612 + ,46 + ,454 + ,54 + ,12 + ,0 + ,0 + ,1 + ,65475 + ,18 + ,225 + ,16 + ,13 + ,0 + ,1 + ,0 + ,108446 + ,60 + ,389 + ,22 + ,17 + ,0 + ,0 + ,2 + ,121848 + ,39 + ,339 + ,37 + ,17 + ,0 + ,0 + ,2 + ,76302 + ,31 + ,333 + ,29 + ,20 + ,0 + ,1 + ,0 + ,98104 + ,54 + ,636 + ,55 + ,17 + ,0 + ,0 + ,0 + ,30989 + ,14 + ,93 + ,5 + ,17 + ,0 + ,1 + ,1 + ,31774 + ,23 + ,170 + ,0 + ,17 + ,0 + ,0 + ,0 + ,150580 + ,77 + ,530 + ,27 + ,22 + ,0 + ,1 + ,1 + ,59382 + ,49 + ,227 + ,29 + ,12 + ,0 + ,0 + ,0 + ,84105 + ,20 + ,261 + ,17 + ,17 + ,0 + ,0) + ,dim=c(8 + ,67) + ,dimnames=list(c('Testscore' + ,'RFC_time' + ,'Logins' + ,'Cviews' + ,'Bcomp' + ,'CompRev' + ,'CourseId' + ,'Geslacht') + ,1:67)) > y <- array(NA,dim=c(8,67),dimnames=list(c('Testscore','RFC_time','Logins','Cviews','Bcomp','CompRev','CourseId','Geslacht'),1:67)) > for (i in 1:dim(x)[1]) + { + for (j in 1:dim(x)[2]) + { + y[i,j] <- as.numeric(x[i,j]) + } + } > par4 = 'no' > par3 = '3' > par2 = 'none' > par1 = '2' > #'GNU S' R Code compiled by R2WASP v. 1.0.44 () > #Author: Dr. Ian E. Holliday > #To cite this work: Ian E. Holliday, 2009, YOUR SOFTWARE TITLE (vNUMBER) in Free Statistics Software (v$_version), Office for Research Development and Education, URL http://www.wessa.net/rwasp_YOURPAGE.wasp/ > #Source of accompanying publication: > #Technical description: > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- data.frame(t(y)) > is.data.frame(x) [1] TRUE > x <- x[!is.na(x[,par1]),] > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "RFC_time" > x[,par1] [1] 210907 149061 237213 133131 324799 230964 236785 344297 174724 174415 [11] 223632 294424 106408 96560 265769 149112 152871 362301 183167 218946 [21] 244052 341570 196553 143246 167488 143756 152299 193339 130585 112611 [31] 148446 182079 243060 162765 85574 225060 133328 100750 101523 243511 [41] 152474 132487 317394 244749 128423 97839 229242 324598 195838 254488 [51] 92499 224330 181633 271856 95227 98146 118612 65475 108446 121848 [61] 76302 98104 30989 31774 150580 59382 84105 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) 30989 31774 59382 65475 76302 84105 85574 92499 95227 96560 97839 1 1 1 1 1 1 1 1 1 1 1 98104 98146 100750 101523 106408 108446 112611 118612 121848 128423 130585 1 1 1 1 1 1 1 1 1 1 1 132487 133131 133328 143246 143756 148446 149061 149112 150580 152299 152474 1 1 1 1 1 1 1 1 1 1 1 152871 162765 167488 174415 174724 181633 182079 183167 193339 195838 196553 1 1 1 1 1 1 1 1 1 1 1 210907 218946 223632 224330 225060 229242 230964 236785 237213 243060 243511 1 1 1 1 1 1 1 1 1 1 1 244052 244749 254488 265769 271856 294424 317394 324598 324799 341570 344297 1 1 1 1 1 1 1 1 1 1 1 362301 1 > colnames(x) [1] "Testscore" "RFC_time" "Logins" "Cviews" "Bcomp" "CompRev" [7] "CourseId" "Geslacht" > colnames(x)[par1] [1] "RFC_time" > x[,par1] [1] 210907 149061 237213 133131 324799 230964 236785 344297 174724 174415 [11] 223632 294424 106408 96560 265769 149112 152871 362301 183167 218946 [21] 244052 341570 196553 143246 167488 143756 152299 193339 130585 112611 [31] 148446 182079 243060 162765 85574 225060 133328 100750 101523 243511 [41] 152474 132487 317394 244749 128423 97839 229242 324598 195838 254488 [51] 92499 224330 181633 271856 95227 98146 118612 65475 108446 121848 [61] 76302 98104 30989 31774 150580 59382 84105 > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/wessaorg/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/wessaorg/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/1tqlz1324433493.tab") + } + } > m Conditional inference tree with 5 terminal nodes Response: RFC_time Inputs: Testscore, Logins, Cviews, Bcomp, CompRev, CourseId, Geslacht Number of observations: 67 1) Cviews <= 537; criterion = 1, statistic = 49.171 2) Bcomp <= 37; criterion = 1, statistic = 19.806 3)* weights = 16 2) Bcomp > 37 4)* weights = 20 1) Cviews > 537 5) Cviews <= 905; criterion = 0.999, statistic = 15.637 6) CompRev <= 32; criterion = 0.959, statistic = 7.55 7)* weights = 12 6) CompRev > 32 8)* weights = 9 5) Cviews > 905 9)* weights = 10 > postscript(file="/var/wessaorg/rcomp/tmp/2t75g1324433493.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/wessaorg/rcomp/tmp/3inzb1324433493.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } Actuals Forecasts Residuals 1 210907 148507.30 62399.7000 2 149061 189834.17 -40773.1667 3 237213 225791.44 11421.5556 4 133131 148507.30 -15376.3000 5 324799 299462.50 25336.5000 6 230964 189834.17 41129.8333 7 236785 189834.17 46950.8333 8 344297 299462.50 44834.5000 9 174724 299462.50 -124738.5000 10 174415 189834.17 -15419.1667 11 223632 148507.30 75124.7000 12 294424 299462.50 -5038.5000 13 106408 89563.56 16844.4375 14 96560 148507.30 -51947.3000 15 265769 299462.50 -33693.5000 16 149112 148507.30 604.7000 17 152871 148507.30 4363.7000 18 362301 299462.50 62838.5000 19 183167 225791.44 -42624.4444 20 218946 189834.17 29111.8333 21 244052 225791.44 18260.5556 22 341570 299462.50 42107.5000 23 196553 148507.30 48045.7000 24 143246 148507.30 -5261.3000 25 167488 189834.17 -22346.1667 26 143756 148507.30 -4751.3000 27 152299 148507.30 3791.7000 28 193339 148507.30 44831.7000 29 130585 148507.30 -17922.3000 30 112611 148507.30 -35896.3000 31 148446 225791.44 -77345.4444 32 182079 148507.30 33571.7000 33 243060 189834.17 53225.8333 34 162765 148507.30 14257.7000 35 85574 89563.56 -3989.5625 36 225060 225791.44 -731.4444 37 133328 148507.30 -15179.3000 38 100750 148507.30 -47757.3000 39 101523 148507.30 -46984.3000 40 243511 225791.44 17719.5556 41 152474 189834.17 -37360.1667 42 132487 148507.30 -16020.3000 43 317394 299462.50 17931.5000 44 244749 299462.50 -54713.5000 45 128423 89563.56 38859.4375 46 97839 89563.56 8275.4375 47 229242 189834.17 39407.8333 48 324598 299462.50 25135.5000 49 195838 189834.17 6003.8333 50 254488 225791.44 28696.5556 51 92499 89563.56 2935.4375 52 224330 225791.44 -1461.4444 53 181633 189834.17 -8201.1667 54 271856 225791.44 46064.5556 55 95227 89563.56 5663.4375 56 98146 89563.56 8582.4375 57 118612 148507.30 -29895.3000 58 65475 89563.56 -24088.5625 59 108446 89563.56 18882.4375 60 121848 89563.56 32284.4375 61 76302 89563.56 -13261.5625 62 98104 189834.17 -91730.1667 63 30989 89563.56 -58574.5625 64 31774 89563.56 -57789.5625 65 150580 89563.56 61016.4375 66 59382 89563.56 -30181.5625 67 84105 89563.56 -5458.5625 > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } > postscript(file="/var/wessaorg/rcomp/tmp/48fvq1324433493.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/5gda31324433493.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/6q5691324433493.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/70ys91324433493.tab") + } > > try(system("convert tmp/2t75g1324433493.ps tmp/2t75g1324433493.png",intern=TRUE)) character(0) > try(system("convert tmp/3inzb1324433493.ps tmp/3inzb1324433493.png",intern=TRUE)) character(0) > try(system("convert tmp/48fvq1324433493.ps tmp/48fvq1324433493.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 2.647 0.219 2.865