R version 2.12.0 (2010-10-15) Copyright (C) 2010 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i486-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > x <- array(list(1536 + ,78 + ,20 + ,17 + ,66 + ,30 + ,1134 + ,46 + ,38 + ,17 + ,68 + ,42 + ,192 + ,18 + ,0 + ,0 + ,0 + ,0 + ,2032 + ,84 + ,49 + ,22 + ,68 + ,54 + ,3230 + ,124 + ,74 + ,30 + ,120 + ,86 + ,5723 + ,214 + ,104 + ,29 + ,112 + ,157 + ,1321 + ,49 + ,37 + ,19 + ,72 + ,36 + ,1077 + ,46 + ,49 + ,25 + ,96 + ,48 + ,1462 + ,37 + ,42 + ,30 + ,109 + ,45 + ,2568 + ,86 + ,62 + ,26 + ,104 + ,77 + ,1810 + ,69 + ,50 + ,20 + ,54 + ,49 + ,1788 + ,58 + ,65 + ,25 + ,98 + ,77 + ,1334 + ,85 + ,28 + ,15 + ,49 + ,28 + ,2415 + ,84 + ,48 + ,22 + ,88 + ,84 + ,1155 + ,43 + ,42 + ,12 + ,45 + ,31 + ,1374 + ,67 + ,47 + ,19 + ,74 + ,28 + ,1503 + ,49 + ,71 + ,28 + ,112 + ,99 + ,999 + ,47 + ,0 + ,12 + ,45 + ,2 + ,2189 + ,76 + ,50 + ,28 + ,110 + ,41 + ,633 + ,20 + ,12 + ,13 + ,39 + ,25 + ,837 + ,48 + ,16 + ,14 + ,55 + ,16 + ,2167 + ,81 + ,76 + ,27 + ,102 + ,96 + ,1451 + ,57 + ,29 + ,25 + ,96 + ,23 + ,1790 + ,45 + ,38 + ,30 + ,86 + ,33 + ,1645 + ,72 + ,50 + ,18 + ,67 + ,46 + ,1179 + ,22 + ,33 + ,17 + ,64 + ,59 + ,1688 + ,138 + ,45 + ,22 + ,82 + ,72 + ,1100 + ,74 + ,59 + ,28 + ,100 + ,72 + ,2258 + ,101 + ,49 + ,25 + ,95 + ,62 + ,1767 + ,35 + ,40 + ,16 + ,63 + ,55 + ,1300 + ,39 + ,40 + ,23 + ,87 + ,27 + ,1432 + ,38 + ,51 + ,20 + ,65 + ,41 + ,1780 + ,87 + ,41 + ,11 + ,43 + ,51 + ,2475 + ,102 + ,73 + ,20 + ,80 + ,26 + ,1930 + ,42 + ,43 + ,21 + ,84 + ,65 + ,1 + ,1 + ,0 + ,0 + ,0 + ,0 + ,1782 + ,54 + ,46 + ,27 + ,105 + ,28 + ,1505 + ,46 + ,44 + ,14 + ,51 + ,44 + ,1820 + ,41 + ,31 + ,29 + ,98 + ,36 + ,1648 + ,49 + ,71 + ,31 + ,124 + ,100 + ,1668 + ,56 + ,61 + ,19 + ,75 + ,104 + ,1366 + ,47 + ,28 + ,30 + ,120 + ,35 + ,864 + ,25 + ,21 + ,23 + ,84 + ,69 + ,1602 + ,62 + ,42 + ,20 + ,78 + ,73 + ,1023 + ,41 + ,44 + ,22 + ,87 + ,106 + ,962 + ,72 + ,34 + ,19 + ,70 + ,53 + ,629 + ,26 + ,15 + ,32 + ,97 + ,43 + ,1568 + ,77 + ,46 + ,18 + ,72 + ,49 + ,1715 + ,75 + ,43 + ,26 + ,104 + ,38 + ,2093 + ,51 + ,47 + ,25 + ,93 + ,51 + ,658 + ,28 + ,12 + ,22 + ,82 + ,14 + ,1198 + ,53 + ,42 + ,19 + ,73 + ,40 + ,2059 + ,64 + ,56 + ,24 + ,87 + ,79 + ,1574 + ,65 + ,41 + ,26 + ,95 + ,52 + ,1447 + ,48 + ,48 + ,27 + ,105 + ,44 + ,1342 + ,44 + ,30 + ,10 + ,37 + ,34 + ,1526 + ,54 + ,44 + ,26 + ,96 + ,47 + ,669 + ,16 + ,25 + ,21 + ,80 + ,32 + ,859 + ,55 + ,42 + ,21 + ,83 + ,31 + ,2315 + ,71 + ,28 + ,34 + ,124 + ,40 + ,1326 + ,47 + ,33 + ,29 + ,116 + ,42 + ,1567 + ,62 + ,32 + ,18 + ,72 + ,34 + ,1080 + ,44 + ,28 + ,16 + ,55 + ,40 + ,896 + ,28 + ,31 + ,23 + ,86 + ,35 + ,855 + ,25 + ,13 + ,22 + ,85 + ,11 + ,1229 + ,37 + ,38 + ,29 + ,107 + ,43 + ,1939 + ,60 + ,39 + ,31 + ,124 + ,53 + ,2293 + ,57 + ,68 + ,21 + ,78 + ,82 + ,818 + ,30 + ,32 + ,21 + ,83 + ,41) + ,dim=c(6 + ,69) + ,dimnames=list(c('Pageviews' + ,'#Logins' + ,'Blogged_Computations' + ,'Reviewed_Compendiums' + ,'Feedback_in_PR' + ,'Included_Hyperlinks') + ,1:69)) > y <- array(NA,dim=c(6,69),dimnames=list(c('Pageviews','#Logins','Blogged_Computations','Reviewed_Compendiums','Feedback_in_PR','Included_Hyperlinks'),1:69)) > for (i in 1:dim(x)[1]) + { + for (j in 1:dim(x)[2]) + { + y[i,j] <- as.numeric(x[i,j]) + } + } > par4 = 'no' > par3 = '3' > par2 = 'none' > par1 = '1' > #'GNU S' R Code compiled by R2WASP v. 1.0.44 () > #Author: Dr. Ian E. Holliday > #To cite this work: Ian E. Holliday, 2009, YOUR SOFTWARE TITLE (vNUMBER) in Free Statistics Software (v$_version), Office for Research Development and Education, URL http://www.wessa.net/rwasp_YOURPAGE.wasp/ > #Source of accompanying publication: > #Technical description: > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- data.frame(t(y)) > is.data.frame(x) [1] TRUE > x <- x[!is.na(x[,par1]),] > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "Pageviews" > x[,par1] [1] 1536 1134 192 2032 3230 5723 1321 1077 1462 2568 1810 1788 1334 2415 1155 [16] 1374 1503 999 2189 633 837 2167 1451 1790 1645 1179 1688 1100 2258 1767 [31] 1300 1432 1780 2475 1930 1 1782 1505 1820 1648 1668 1366 864 1602 1023 [46] 962 629 1568 1715 2093 658 1198 2059 1574 1447 1342 1526 669 859 2315 [61] 1326 1567 1080 896 855 1229 1939 2293 818 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) 1 192 629 633 658 669 818 837 855 859 864 896 962 999 1023 1077 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1080 1100 1134 1155 1179 1198 1229 1300 1321 1326 1334 1342 1366 1374 1432 1447 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1451 1462 1503 1505 1526 1536 1567 1568 1574 1602 1645 1648 1668 1688 1715 1767 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1780 1782 1788 1790 1810 1820 1930 1939 2032 2059 2093 2167 2189 2258 2293 2315 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2415 2475 2568 3230 5723 1 1 1 1 1 > colnames(x) [1] "Pageviews" "X.Logins" "Blogged_Computations" [4] "Reviewed_Compendiums" "Feedback_in_PR" "Included_Hyperlinks" > colnames(x)[par1] [1] "Pageviews" > x[,par1] [1] 1536 1134 192 2032 3230 5723 1321 1077 1462 2568 1810 1788 1334 2415 1155 [16] 1374 1503 999 2189 633 837 2167 1451 1790 1645 1179 1688 1100 2258 1767 [31] 1300 1432 1780 2475 1930 1 1782 1505 1820 1648 1668 1366 864 1602 1023 [46] 962 629 1568 1715 2093 658 1198 2059 1574 1447 1342 1526 669 859 2315 [61] 1326 1567 1080 896 855 1229 1939 2293 818 > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/www/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/www/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/1cw5o1323868151.tab") + } + } > m Conditional inference tree with 5 terminal nodes Response: Pageviews Inputs: X.Logins, Blogged_Computations, Reviewed_Compendiums, Feedback_in_PR, Included_Hyperlinks Number of observations: 69 1) X.Logins <= 85; criterion = 1, statistic = 47.741 2) X.Logins <= 30; criterion = 1, statistic = 29.391 3)* weights = 11 2) X.Logins > 30 4) Blogged_Computations <= 44; criterion = 0.985, statistic = 8.823 5) Reviewed_Compendiums <= 25; criterion = 0.98, statistic = 8.308 6)* weights = 20 5) Reviewed_Compendiums > 25 7)* weights = 11 4) Blogged_Computations > 44 8)* weights = 20 1) X.Logins > 85 9)* weights = 7 > postscript(file="/var/www/rcomp/tmp/26dyv1323868151.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/www/rcomp/tmp/3qx2t1323868151.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } Actuals Forecasts Residuals 1 1536 1295.1000 240.900000 2 1134 1295.1000 -161.100000 3 192 672.1818 -480.181818 4 2032 1754.5000 277.500000 5 3230 2817.4286 412.571429 6 5723 2817.4286 2905.571429 7 1321 1295.1000 25.900000 8 1077 1754.5000 -677.500000 9 1462 1642.0000 -180.000000 10 2568 2817.4286 -249.428571 11 1810 1754.5000 55.500000 12 1788 1754.5000 33.500000 13 1334 1295.1000 38.900000 14 2415 1754.5000 660.500000 15 1155 1295.1000 -140.100000 16 1374 1754.5000 -380.500000 17 1503 1754.5000 -251.500000 18 999 1295.1000 -296.100000 19 2189 1754.5000 434.500000 20 633 672.1818 -39.181818 21 837 1295.1000 -458.100000 22 2167 1754.5000 412.500000 23 1451 1295.1000 155.900000 24 1790 1642.0000 148.000000 25 1645 1754.5000 -109.500000 26 1179 672.1818 506.818182 27 1688 2817.4286 -1129.428571 28 1100 1754.5000 -654.500000 29 2258 2817.4286 -559.428571 30 1767 1295.1000 471.900000 31 1300 1295.1000 4.900000 32 1432 1754.5000 -322.500000 33 1780 2817.4286 -1037.428571 34 2475 2817.4286 -342.428571 35 1930 1295.1000 634.900000 36 1 672.1818 -671.181818 37 1782 1754.5000 27.500000 38 1505 1295.1000 209.900000 39 1820 1642.0000 178.000000 40 1648 1754.5000 -106.500000 41 1668 1754.5000 -86.500000 42 1366 1642.0000 -276.000000 43 864 672.1818 191.818182 44 1602 1295.1000 306.900000 45 1023 1295.1000 -272.100000 46 962 1295.1000 -333.100000 47 629 672.1818 -43.181818 48 1568 1754.5000 -186.500000 49 1715 1642.0000 73.000000 50 2093 1754.5000 338.500000 51 658 672.1818 -14.181818 52 1198 1295.1000 -97.100000 53 2059 1754.5000 304.500000 54 1574 1642.0000 -68.000000 55 1447 1754.5000 -307.500000 56 1342 1295.1000 46.900000 57 1526 1642.0000 -116.000000 58 669 672.1818 -3.181818 59 859 1295.1000 -436.100000 60 2315 1642.0000 673.000000 61 1326 1642.0000 -316.000000 62 1567 1295.1000 271.900000 63 1080 1295.1000 -215.100000 64 896 672.1818 223.818182 65 855 672.1818 182.818182 66 1229 1642.0000 -413.000000 67 1939 1642.0000 297.000000 68 2293 1754.5000 538.500000 69 818 672.1818 145.818182 > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } > postscript(file="/var/www/rcomp/tmp/41uf41323868151.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/5ww1x1323868151.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/6vest1323868151.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/7zln81323868151.tab") + } > > try(system("convert tmp/26dyv1323868151.ps tmp/26dyv1323868151.png",intern=TRUE)) character(0) > try(system("convert tmp/3qx2t1323868151.ps tmp/3qx2t1323868151.png",intern=TRUE)) character(0) > try(system("convert tmp/41uf41323868151.ps tmp/41uf41323868151.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 2.790 0.170 2.963