R version 2.15.2 (2012-10-26) -- "Trick or Treat" Copyright (C) 2012 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > x <- array(list(210907 + ,56 + ,396 + ,81 + ,3 + ,79 + ,30 + ,120982 + ,56 + ,297 + ,55 + ,4 + ,58 + ,28 + ,176508 + ,54 + ,559 + ,50 + ,12 + ,60 + ,38 + ,179321 + ,89 + ,967 + ,125 + ,2 + ,108 + ,30 + ,123185 + ,40 + ,270 + ,40 + ,1 + ,49 + ,22 + ,52746 + ,25 + ,143 + ,37 + ,3 + ,0 + ,26 + ,385534 + ,92 + ,1562 + ,63 + ,0 + ,121 + ,25 + ,33170 + ,18 + ,109 + ,44 + ,0 + ,1 + ,18 + ,101645 + ,63 + ,371 + ,88 + ,0 + ,20 + ,11 + ,149061 + ,44 + ,656 + ,66 + ,5 + ,43 + ,26 + ,165446 + ,33 + ,511 + ,57 + ,0 + ,69 + ,25 + ,237213 + ,84 + ,655 + ,74 + ,0 + ,78 + ,38 + ,173326 + ,88 + ,465 + ,49 + ,7 + ,86 + ,44 + ,133131 + ,55 + ,525 + ,52 + ,7 + ,44 + ,30 + ,258873 + ,60 + ,885 + ,88 + ,3 + ,104 + ,40 + ,180083 + ,66 + ,497 + ,36 + ,9 + ,63 + ,34 + ,324799 + ,154 + ,1436 + ,108 + ,0 + ,158 + ,47 + ,230964 + ,53 + ,612 + ,43 + ,4 + ,102 + ,30 + ,236785 + ,119 + ,865 + ,75 + ,3 + ,77 + ,31 + ,135473 + ,41 + ,385 + ,32 + ,0 + ,82 + ,23 + ,202925 + ,61 + ,567 + ,44 + ,7 + ,115 + ,36 + ,215147 + ,58 + ,639 + ,85 + ,0 + ,101 + ,36 + ,344297 + ,75 + ,963 + ,86 + ,1 + ,80 + ,30 + ,153935 + ,33 + ,398 + ,56 + ,5 + ,50 + ,25 + ,132943 + ,40 + ,410 + ,50 + ,7 + ,83 + ,39 + ,174724 + ,92 + ,966 + ,135 + ,0 + ,123 + ,34 + ,174415 + ,100 + ,801 + ,63 + ,0 + ,73 + ,31 + ,225548 + ,112 + ,892 + ,81 + ,5 + ,81 + ,31 + ,223632 + ,73 + ,513 + ,52 + ,0 + ,105 + ,33 + ,124817 + ,40 + ,469 + ,44 + ,0 + ,47 + ,25 + ,221698 + ,45 + ,683 + ,113 + ,0 + ,105 + ,33 + ,210767 + ,60 + ,643 + ,39 + ,3 + ,94 + ,35 + ,170266 + ,62 + ,535 + ,73 + ,4 + ,44 + ,42 + ,260561 + ,75 + ,625 + ,48 + ,1 + ,114 + ,43 + ,84853 + ,31 + ,264 + ,33 + ,4 + ,38 + ,30 + ,294424 + ,77 + ,992 + ,59 + ,2 + ,107 + ,33 + ,101011 + ,34 + ,238 + ,41 + ,0 + ,30 + ,13 + ,215641 + ,46 + ,818 + ,69 + ,0 + ,71 + ,32 + ,325107 + ,99 + ,937 + ,64 + ,0 + ,84 + ,36 + ,7176 + ,17 + ,70 + ,1 + ,0 + ,0 + ,0 + ,167542 + ,66 + ,507 + ,59 + ,2 + ,59 + ,28 + ,106408 + ,30 + ,260 + ,32 + ,1 + ,33 + ,14 + ,96560 + ,76 + ,503 + ,129 + ,0 + ,42 + ,17 + ,265769 + ,146 + ,927 + ,37 + ,2 + ,96 + ,32 + ,269651 + ,67 + ,1269 + ,31 + ,10 + ,106 + ,30 + ,149112 + ,56 + ,537 + ,65 + ,6 + ,56 + ,35 + ,175824 + ,107 + ,910 + ,107 + ,0 + ,57 + ,20 + ,152871 + ,58 + ,532 + ,74 + ,5 + ,59 + ,28 + ,111665 + ,34 + ,345 + ,54 + ,4 + ,39 + ,28 + ,116408 + ,61 + ,918 + ,76 + ,1 + ,34 + ,39 + ,362301 + ,119 + ,1635 + ,715 + ,2 + ,76 + ,34 + ,78800 + ,42 + ,330 + ,57 + ,2 + ,20 + ,26 + ,183167 + ,66 + ,557 + ,66 + ,0 + ,91 + ,39 + ,277965 + ,89 + ,1178 + ,106 + ,8 + ,115 + ,39 + ,150629 + ,44 + ,740 + ,54 + ,3 + ,85 + ,33 + ,168809 + ,66 + ,452 + ,32 + ,0 + ,76 + ,28 + ,24188 + ,24 + ,218 + ,20 + ,0 + ,8 + ,4) + ,dim=c(7 + ,57) + ,dimnames=list(c('time_in_rfc' + ,'logins' + ,'compendium_views_info' + ,'compendium_views_pr' + ,'shared_compendiums' + ,'blogged_computations' + ,'compendiums_reviewed') + ,1:57)) > y <- array(NA,dim=c(7,57),dimnames=list(c('time_in_rfc','logins','compendium_views_info','compendium_views_pr','shared_compendiums','blogged_computations','compendiums_reviewed'),1:57)) > for (i in 1:dim(x)[1]) + { + for (j in 1:dim(x)[2]) + { + y[i,j] <- as.numeric(x[i,j]) + } + } > par4 = 'no' > par3 = '2' > par2 = 'quantiles' > par1 = '1' > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Attaching package: 'zoo' The following object(s) are masked from 'package:base': as.Date, as.Date.numeric Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Hmisc library by Frank E Harrell Jr Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview') to see overall documentation. NOTE:Hmisc no longer redefines [.factor to drop unused levels when subsetting. To get the old behavior of Hmisc type dropUnusedLevels(). Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- data.frame(t(y)) > is.data.frame(x) [1] TRUE > x <- x[!is.na(x[,par1]),] > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "time_in_rfc" > x[,par1] [1] 210907 120982 176508 179321 123185 52746 385534 33170 101645 149061 [11] 165446 237213 173326 133131 258873 180083 324799 230964 236785 135473 [21] 202925 215147 344297 153935 132943 174724 174415 225548 223632 124817 [31] 221698 210767 170266 260561 84853 294424 101011 215641 325107 7176 [41] 167542 106408 96560 265769 269651 149112 175824 152871 111665 116408 [51] 362301 78800 183167 277965 150629 168809 24188 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) [ 7176,174724) [174724,385534] 29 28 > colnames(x) [1] "time_in_rfc" "logins" "compendium_views_info" [4] "compendium_views_pr" "shared_compendiums" "blogged_computations" [7] "compendiums_reviewed" > colnames(x)[par1] [1] "time_in_rfc" > x[,par1] [1] [174724,385534] [ 7176,174724) [174724,385534] [174724,385534] [5] [ 7176,174724) [ 7176,174724) [174724,385534] [ 7176,174724) [9] [ 7176,174724) [ 7176,174724) [ 7176,174724) [174724,385534] [13] [ 7176,174724) [ 7176,174724) [174724,385534] [174724,385534] [17] [174724,385534] [174724,385534] [174724,385534] [ 7176,174724) [21] [174724,385534] [174724,385534] [174724,385534] [ 7176,174724) [25] [ 7176,174724) [174724,385534] [ 7176,174724) [174724,385534] [29] [174724,385534] [ 7176,174724) [174724,385534] [174724,385534] [33] [ 7176,174724) [174724,385534] [ 7176,174724) [174724,385534] [37] [ 7176,174724) [174724,385534] [174724,385534] [ 7176,174724) [41] [ 7176,174724) [ 7176,174724) [ 7176,174724) [174724,385534] [45] [174724,385534] [ 7176,174724) [174724,385534] [ 7176,174724) [49] [ 7176,174724) [ 7176,174724) [174724,385534] [ 7176,174724) [53] [174724,385534] [174724,385534] [ 7176,174724) [ 7176,174724) [57] [ 7176,174724) Levels: [ 7176,174724) [174724,385534] > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/fisher/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/fisher/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/fisher/rcomp/tmp/1lfbx1355156706.tab") + } + } > m Conditional inference tree with 3 terminal nodes Response: as.factor(time_in_rfc) Inputs: logins, compendium_views_info, compendium_views_pr, shared_compendiums, blogged_computations, compendiums_reviewed Number of observations: 57 1) blogged_computations <= 59; criterion = 1, statistic = 29.376 2) logins <= 56; criterion = 0.98, statistic = 8.57 3)* weights = 16 2) logins > 56 4)* weights = 7 1) blogged_computations > 59 5)* weights = 34 > postscript(file="/var/fisher/rcomp/tmp/25taf1355156706.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/fisher/rcomp/tmp/3r30m1355156706.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } [,1] [,2] [1,] 2 2 [2,] 1 1 [3,] 2 2 [4,] 2 2 [5,] 1 1 [6,] 1 1 [7,] 2 2 [8,] 1 1 [9,] 1 1 [10,] 1 1 [11,] 1 2 [12,] 2 2 [13,] 1 2 [14,] 1 1 [15,] 2 2 [16,] 2 2 [17,] 2 2 [18,] 2 2 [19,] 2 2 [20,] 1 2 [21,] 2 2 [22,] 2 2 [23,] 2 2 [24,] 1 1 [25,] 1 2 [26,] 2 2 [27,] 1 2 [28,] 2 2 [29,] 2 2 [30,] 1 1 [31,] 2 2 [32,] 2 2 [33,] 1 1 [34,] 2 2 [35,] 1 1 [36,] 2 2 [37,] 1 1 [38,] 2 2 [39,] 2 2 [40,] 1 1 [41,] 1 1 [42,] 1 1 [43,] 1 1 [44,] 2 2 [45,] 2 2 [46,] 1 1 [47,] 2 1 [48,] 1 1 [49,] 1 1 [50,] 1 1 [51,] 2 2 [52,] 1 1 [53,] 2 2 [54,] 2 2 [55,] 1 2 [56,] 1 2 [57,] 1 1 [ 7176,174724) [174724,385534] [ 7176,174724) 22 7 [174724,385534] 1 27 > postscript(file="/var/fisher/rcomp/tmp/4cg691355156706.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/fisher/rcomp/tmp/5mxou1355156706.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/fisher/rcomp/tmp/6ep6c1355156706.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/fisher/rcomp/tmp/7m2w21355156706.tab") + } > > try(system("convert tmp/25taf1355156706.ps tmp/25taf1355156706.png",intern=TRUE)) character(0) > try(system("convert tmp/3r30m1355156706.ps tmp/3r30m1355156706.png",intern=TRUE)) character(0) > try(system("convert tmp/4cg691355156706.ps tmp/4cg691355156706.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 4.277 0.588 4.841