R version 2.15.2 (2012-10-26) -- "Trick or Treat" Copyright (C) 2012 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > x <- array(list(210907 + ,56 + ,396 + ,81 + ,3 + ,79 + ,30 + ,120982 + ,56 + ,297 + ,55 + ,4 + ,58 + ,28 + ,176508 + ,54 + ,559 + ,50 + ,12 + ,60 + ,38 + ,179321 + ,89 + ,967 + ,125 + ,2 + ,108 + ,30 + ,123185 + ,40 + ,270 + ,40 + ,1 + ,49 + ,22 + ,52746 + ,25 + ,143 + ,37 + ,3 + ,0 + ,26 + ,385534 + ,92 + ,1562 + ,63 + ,0 + ,121 + ,25 + ,33170 + ,18 + ,109 + ,44 + ,0 + ,1 + ,18 + ,101645 + ,63 + ,371 + ,88 + ,0 + ,20 + ,11 + ,149061 + ,44 + ,656 + ,66 + ,5 + ,43 + ,26 + ,165446 + ,33 + ,511 + ,57 + ,0 + ,69 + ,25 + ,237213 + ,84 + ,655 + ,74 + ,0 + ,78 + ,38 + ,173326 + ,88 + ,465 + ,49 + ,7 + ,86 + ,44 + ,133131 + ,55 + ,525 + ,52 + ,7 + ,44 + ,30 + ,258873 + ,60 + ,885 + ,88 + ,3 + ,104 + ,40 + ,180083 + ,66 + ,497 + ,36 + ,9 + ,63 + ,34 + ,324799 + ,154 + ,1436 + ,108 + ,0 + ,158 + ,47 + ,230964 + ,53 + ,612 + ,43 + ,4 + ,102 + ,30 + ,236785 + ,119 + ,865 + ,75 + ,3 + ,77 + ,31 + ,135473 + ,41 + ,385 + ,32 + ,0 + ,82 + ,23 + ,202925 + ,61 + ,567 + ,44 + ,7 + ,115 + ,36 + ,215147 + ,58 + ,639 + ,85 + ,0 + ,101 + ,36 + ,344297 + ,75 + ,963 + ,86 + ,1 + ,80 + ,30 + ,153935 + ,33 + ,398 + ,56 + ,5 + ,50 + ,25 + ,132943 + ,40 + ,410 + ,50 + ,7 + ,83 + ,39 + ,174724 + ,92 + ,966 + ,135 + ,0 + ,123 + ,34 + ,174415 + ,100 + ,801 + ,63 + ,0 + ,73 + ,31 + ,225548 + ,112 + ,892 + ,81 + ,5 + ,81 + ,31 + ,223632 + ,73 + ,513 + ,52 + ,0 + ,105 + ,33 + ,124817 + ,40 + ,469 + ,44 + ,0 + ,47 + ,25 + ,221698 + ,45 + ,683 + ,113 + ,0 + ,105 + ,33 + ,210767 + ,60 + ,643 + ,39 + ,3 + ,94 + ,35 + ,170266 + ,62 + ,535 + ,73 + ,4 + ,44 + ,42 + ,260561 + ,75 + ,625 + ,48 + ,1 + ,114 + ,43 + ,84853 + ,31 + ,264 + ,33 + ,4 + ,38 + ,30 + ,294424 + ,77 + ,992 + ,59 + ,2 + ,107 + ,33 + ,101011 + ,34 + ,238 + ,41 + ,0 + ,30 + ,13 + ,215641 + ,46 + ,818 + ,69 + ,0 + ,71 + ,32 + ,325107 + ,99 + ,937 + ,64 + ,0 + ,84 + ,36 + ,7176 + ,17 + ,70 + ,1 + ,0 + ,0 + ,0 + ,167542 + ,66 + ,507 + ,59 + ,2 + ,59 + ,28 + ,106408 + ,30 + ,260 + ,32 + ,1 + ,33 + ,14 + ,96560 + ,76 + ,503 + ,129 + ,0 + ,42 + ,17 + ,265769 + ,146 + ,927 + ,37 + ,2 + ,96 + ,32 + ,269651 + ,67 + ,1269 + ,31 + ,10 + ,106 + ,30 + ,149112 + ,56 + ,537 + ,65 + ,6 + ,56 + ,35 + ,175824 + ,107 + ,910 + ,107 + ,0 + ,57 + ,20 + ,152871 + ,58 + ,532 + ,74 + ,5 + ,59 + ,28 + ,111665 + ,34 + ,345 + ,54 + ,4 + ,39 + ,28 + ,116408 + ,61 + ,918 + ,76 + ,1 + ,34 + ,39 + ,362301 + ,119 + ,1635 + ,715 + ,2 + ,76 + ,34 + ,78800 + ,42 + ,330 + ,57 + ,2 + ,20 + ,26 + ,183167 + ,66 + ,557 + ,66 + ,0 + ,91 + ,39 + ,277965 + ,89 + ,1178 + ,106 + ,8 + ,115 + ,39 + ,150629 + ,44 + ,740 + ,54 + ,3 + ,85 + ,33 + ,168809 + ,66 + ,452 + ,32 + ,0 + ,76 + ,28 + ,24188 + ,24 + ,218 + ,20 + ,0 + ,8 + ,4) + ,dim=c(7 + ,57) + ,dimnames=list(c('time_in_rfc' + ,'logins' + ,'compendium_views_info' + ,'compendium_views_pr' + ,'shared_compendiums' + ,'blogged_computations' + ,'compendiums_reviewed') + ,1:57)) > y <- array(NA,dim=c(7,57),dimnames=list(c('time_in_rfc','logins','compendium_views_info','compendium_views_pr','shared_compendiums','blogged_computations','compendiums_reviewed'),1:57)) > for (i in 1:dim(x)[1]) + { + for (j in 1:dim(x)[2]) + { + y[i,j] <- as.numeric(x[i,j]) + } + } > par4 = 'no' > par3 = '6' > par2 = 'none' > par1 = '1' > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Attaching package: 'zoo' The following object(s) are masked from 'package:base': as.Date, as.Date.numeric Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Hmisc library by Frank E Harrell Jr Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview') to see overall documentation. NOTE:Hmisc no longer redefines [.factor to drop unused levels when subsetting. To get the old behavior of Hmisc type dropUnusedLevels(). Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- data.frame(t(y)) > is.data.frame(x) [1] TRUE > x <- x[!is.na(x[,par1]),] > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "time_in_rfc" > x[,par1] [1] 210907 120982 176508 179321 123185 52746 385534 33170 101645 149061 [11] 165446 237213 173326 133131 258873 180083 324799 230964 236785 135473 [21] 202925 215147 344297 153935 132943 174724 174415 225548 223632 124817 [31] 221698 210767 170266 260561 84853 294424 101011 215641 325107 7176 [41] 167542 106408 96560 265769 269651 149112 175824 152871 111665 116408 [51] 362301 78800 183167 277965 150629 168809 24188 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) 7176 24188 33170 52746 78800 84853 96560 101011 101645 106408 111665 1 1 1 1 1 1 1 1 1 1 1 116408 120982 123185 124817 132943 133131 135473 149061 149112 150629 152871 1 1 1 1 1 1 1 1 1 1 1 153935 165446 167542 168809 170266 173326 174415 174724 175824 176508 179321 1 1 1 1 1 1 1 1 1 1 1 180083 183167 202925 210767 210907 215147 215641 221698 223632 225548 230964 1 1 1 1 1 1 1 1 1 1 1 236785 237213 258873 260561 265769 269651 277965 294424 324799 325107 344297 1 1 1 1 1 1 1 1 1 1 1 362301 385534 1 1 > colnames(x) [1] "time_in_rfc" "logins" "compendium_views_info" [4] "compendium_views_pr" "shared_compendiums" "blogged_computations" [7] "compendiums_reviewed" > colnames(x)[par1] [1] "time_in_rfc" > x[,par1] [1] 210907 120982 176508 179321 123185 52746 385534 33170 101645 149061 [11] 165446 237213 173326 133131 258873 180083 324799 230964 236785 135473 [21] 202925 215147 344297 153935 132943 174724 174415 225548 223632 124817 [31] 221698 210767 170266 260561 84853 294424 101011 215641 325107 7176 [41] 167542 106408 96560 265769 269651 149112 175824 152871 111665 116408 [51] 362301 78800 183167 277965 150629 168809 24188 > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/wessaorg/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/wessaorg/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/1zopd1355156448.tab") + } + } > m Conditional inference tree with 4 terminal nodes Response: time_in_rfc Inputs: logins, compendium_views_info, compendium_views_pr, shared_compendiums, blogged_computations, compendiums_reviewed Number of observations: 57 1) compendium_views_info <= 559; criterion = 1, statistic = 40.372 2) blogged_computations <= 42; criterion = 1, statistic = 22.865 3)* weights = 11 2) blogged_computations > 42 4)* weights = 19 1) compendium_views_info > 559 5) compendium_views_info <= 918; criterion = 0.995, statistic = 11.179 6)* weights = 16 5) compendium_views_info > 918 7)* weights = 11 > postscript(file="/var/wessaorg/rcomp/tmp/2bblp1355156448.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/wessaorg/rcomp/tmp/3xgja1355156448.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } Actuals Forecasts Residuals 1 210907 160322.89 50584.105 2 120982 160322.89 -39340.895 3 176508 160322.89 16185.105 4 179321 291262.91 -111941.909 5 123185 160322.89 -37137.895 6 52746 72565.64 -19819.636 7 385534 291262.91 94271.091 8 33170 72565.64 -39395.636 9 101645 72565.64 29079.364 10 149061 205153.69 -56092.688 11 165446 160322.89 5123.105 12 237213 205153.69 32059.312 13 173326 160322.89 13003.105 14 133131 160322.89 -27191.895 15 258873 205153.69 53719.312 16 180083 160322.89 19760.105 17 324799 291262.91 33536.091 18 230964 205153.69 25810.312 19 236785 205153.69 31631.312 20 135473 160322.89 -24849.895 21 202925 205153.69 -2228.688 22 215147 205153.69 9993.312 23 344297 291262.91 53034.091 24 153935 160322.89 -6387.895 25 132943 160322.89 -27379.895 26 174724 291262.91 -116538.909 27 174415 205153.69 -30738.688 28 225548 205153.69 20394.312 29 223632 160322.89 63309.105 30 124817 160322.89 -35505.895 31 221698 205153.69 16544.312 32 210767 205153.69 5613.312 33 170266 160322.89 9943.105 34 260561 205153.69 55407.312 35 84853 72565.64 12287.364 36 294424 291262.91 3161.091 37 101011 72565.64 28445.364 38 215641 205153.69 10487.312 39 325107 291262.91 33844.091 40 7176 72565.64 -65389.636 41 167542 160322.89 7219.105 42 106408 72565.64 33842.364 43 96560 72565.64 23994.364 44 265769 291262.91 -25493.909 45 269651 291262.91 -21611.909 46 149112 160322.89 -11210.895 47 175824 205153.69 -29329.688 48 152871 160322.89 -7451.895 49 111665 72565.64 39099.364 50 116408 205153.69 -88745.688 51 362301 291262.91 71038.091 52 78800 72565.64 6234.364 53 183167 160322.89 22844.105 54 277965 291262.91 -13297.909 55 150629 205153.69 -54524.688 56 168809 160322.89 8486.105 57 24188 72565.64 -48377.636 > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } > postscript(file="/var/wessaorg/rcomp/tmp/408ft1355156448.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/5x7vx1355156448.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/6rn8w1355156448.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/75xdj1355156448.tab") + } > > try(system("convert tmp/2bblp1355156448.ps tmp/2bblp1355156448.png",intern=TRUE)) character(0) > try(system("convert tmp/3xgja1355156448.ps tmp/3xgja1355156448.png",intern=TRUE)) character(0) > try(system("convert tmp/408ft1355156448.ps tmp/408ft1355156448.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 3.996 0.370 4.346