R version 2.13.0 (2011-04-13) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i486-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > x <- array(list(2050 + ,2650 + ,13 + ,7 + ,1 + ,0 + ,1639 + ,2150 + ,2664 + ,6 + ,5 + ,1 + ,0 + ,1193 + ,2150 + ,2921 + ,3 + ,6 + ,1 + ,0 + ,1635 + ,1999 + ,2580 + ,4 + ,4 + ,1 + ,0 + ,1732 + ,1900 + ,2580 + ,4 + ,4 + ,1 + ,0 + ,1534 + ,1800 + ,2774 + ,2 + ,4 + ,1 + ,0 + ,1765 + ,1560 + ,1920 + ,1 + ,5 + ,1 + ,0 + ,1161 + ,1449 + ,1710 + ,1 + ,3 + ,1 + ,0 + ,1010 + ,1375 + ,1837 + ,4 + ,5 + ,1 + ,0 + ,1191 + ,1270 + ,1880 + ,8 + ,6 + ,1 + ,0 + ,930 + ,1250 + ,2150 + ,15 + ,3 + ,1 + ,0 + ,984 + ,1235 + ,1894 + ,14 + ,5 + ,1 + ,0 + ,1112 + ,1170 + ,1928 + ,18 + ,8 + ,1 + ,0 + ,600 + ,1155 + ,1767 + ,16 + ,4 + ,1 + ,0 + ,794 + ,1110 + ,1630 + ,15 + ,3 + ,1 + ,1 + ,867 + ,1139 + ,1680 + ,17 + ,4 + ,1 + ,1 + ,750 + ,995 + ,1500 + ,15 + ,4 + ,1 + ,0 + ,743 + ,900 + ,1400 + ,16 + ,2 + ,1 + ,1 + ,731 + ,960 + ,1573 + ,17 + ,6 + ,1 + ,0 + ,768 + ,1695 + ,2931 + ,28 + ,3 + ,1 + ,1 + ,1142 + ,1553 + ,2200 + ,28 + ,4 + ,1 + ,0 + ,1035 + ,1020 + ,1478 + ,53 + ,3 + ,1 + ,1 + ,626 + ,1020 + ,1713 + ,30 + ,4 + ,1 + ,1 + ,600 + ,850 + ,1190 + ,41 + ,1 + ,1 + ,0 + ,600 + ,720 + ,1121 + ,46 + ,4 + ,1 + ,0 + ,398 + ,749 + ,1733 + ,43 + ,6 + ,1 + ,0 + ,656 + ,2150 + ,2848 + ,4 + ,6 + ,1 + ,0 + ,1487 + ,1350 + ,2253 + ,23 + ,4 + ,1 + ,0 + ,939 + ,1299 + ,2743 + ,25 + ,5 + ,1 + ,1 + ,1232 + ,1250 + ,2180 + ,17 + ,4 + ,1 + ,1 + ,1141 + ,1239 + ,1706 + ,14 + ,4 + ,1 + ,0 + ,810 + ,1125 + ,1710 + ,16 + ,4 + ,1 + ,0 + ,800 + ,1080 + ,2200 + ,26 + ,4 + ,1 + ,0 + ,1076 + ,1050 + ,1680 + ,13 + ,4 + ,1 + ,0 + ,875 + ,1049 + ,1900 + ,34 + ,3 + ,1 + ,0 + ,690 + ,934 + ,1543 + ,20 + ,3 + ,1 + ,0 + ,820 + ,875 + ,1173 + ,6 + ,4 + ,1 + ,0 + ,456 + ,805 + ,1258 + ,7 + ,4 + ,1 + ,1 + ,821 + ,759 + ,997 + ,4 + ,4 + ,1 + ,0 + ,461 + ,729 + ,1007 + ,19 + ,6 + ,1 + ,0 + ,513 + ,710 + ,1083 + ,22 + ,4 + ,1 + ,0 + ,504 + ,690 + ,1348 + ,15 + ,2 + ,1 + ,0 + ,975 + ,1500 + ,7 + ,3 + ,0 + ,1 + ,700 + ,939 + ,1428 + ,40 + ,2 + ,0 + ,0 + ,701 + ,2100 + ,2116 + ,25 + ,3 + ,0 + ,0 + ,1209 + ,580 + ,1051 + ,15 + ,2 + ,0 + ,0 + ,426 + ,1844 + ,2250 + ,40 + ,6 + ,0 + ,0 + ,915 + ,699 + ,1400 + ,45 + ,1 + ,0 + ,1 + ,481 + ,1160 + ,1720 + ,5 + ,4 + ,0 + ,0 + ,867 + ,1109 + ,1740 + ,4 + ,3 + ,0 + ,0 + ,816 + ,1129 + ,1700 + ,6 + ,4 + ,0 + ,0 + ,725 + ,1050 + ,1620 + ,6 + ,4 + ,0 + ,0 + ,800 + ,1045 + ,1630 + ,6 + ,4 + ,0 + ,0 + ,750 + ,1050 + ,1920 + ,8 + ,4 + ,0 + ,0 + ,944 + ,1020 + ,1606 + ,5 + ,4 + ,0 + ,0 + ,811 + ,1000 + ,1535 + ,7 + ,5 + ,0 + ,1 + ,668 + ,1030 + ,1540 + ,6 + ,2 + ,0 + ,1 + ,826 + ,975 + ,1739 + ,13 + ,3 + ,0 + ,0 + ,880 + ,940 + ,1305 + ,5 + ,3 + ,0 + ,0 + ,647 + ,920 + ,1415 + ,7 + ,4 + ,0 + ,0 + ,866 + ,945 + ,1580 + ,9 + ,3 + ,0 + ,0 + ,810 + ,874 + ,1236 + ,3 + ,4 + ,0 + ,0 + ,707 + ,872 + ,1229 + ,6 + ,3 + ,0 + ,0 + ,721 + ,870 + ,1273 + ,4 + ,4 + ,0 + ,0 + ,638 + ,869 + ,1165 + ,7 + ,4 + ,0 + ,0 + ,694 + ,766 + ,1200 + ,7 + ,4 + ,0 + ,1 + ,634 + ,739 + ,970 + ,4 + ,4 + ,0 + ,1 + ,541) + ,dim=c(7 + ,67) + ,dimnames=list(c('PRICE' + ,'SQFT' + ,'AGE' + ,'FEATS' + ,'NE' + ,'COR' + ,'TAX') + ,1:67)) > y <- array(NA,dim=c(7,67),dimnames=list(c('PRICE','SQFT','AGE','FEATS','NE','COR','TAX'),1:67)) > for (i in 1:dim(x)[1]) + { + for (j in 1:dim(x)[2]) + { + y[i,j] <- as.numeric(x[i,j]) + } + } > par4 = 'no' > par3 = '2' > par2 = 'none' > par1 = '7' > #'GNU S' R Code compiled by R2WASP v. 1.0.44 () > #Author: Dr. Ian E. Holliday > #To cite this work: Ian E. Holliday, 2009, YOUR SOFTWARE TITLE (vNUMBER) in Free Statistics Software (v$_version), Office for Research Development and Education, URL http://www.wessa.net/rwasp_YOURPAGE.wasp/ > #Source of accompanying publication: > #Technical description: > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- data.frame(t(y)) > is.data.frame(x) [1] TRUE > x <- x[!is.na(x[,par1]),] > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "TAX" > x[,par1] [1] 1639 1193 1635 1732 1534 1765 1161 1010 1191 930 984 1112 600 794 867 [16] 750 743 731 768 1142 1035 626 600 600 398 656 1487 939 1232 1141 [31] 810 800 1076 875 690 820 456 821 461 513 504 975 939 2100 580 [46] 1844 699 1160 1109 1129 1050 1045 1050 1020 1000 1030 975 940 920 945 [61] 874 872 870 869 766 739 2050 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) 398 456 461 504 513 580 600 626 656 690 699 731 739 743 750 766 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 768 794 800 810 820 821 867 869 870 872 874 875 920 930 939 940 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 945 975 984 1000 1010 1020 1030 1035 1045 1050 1076 1109 1112 1129 1141 1142 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1160 1161 1191 1193 1232 1487 1534 1635 1639 1732 1765 1844 2050 2100 1 1 1 1 1 1 1 1 1 1 1 1 1 1 > colnames(x) [1] "PRICE" "SQFT" "AGE" "FEATS" "NE" "COR" "TAX" > colnames(x)[par1] [1] "TAX" > x[,par1] [1] 1639 1193 1635 1732 1534 1765 1161 1010 1191 930 984 1112 600 794 867 [16] 750 743 731 768 1142 1035 626 600 600 398 656 1487 939 1232 1141 [31] 810 800 1076 875 690 820 456 821 461 513 504 975 939 2100 580 [46] 1844 699 1160 1109 1129 1050 1045 1050 1020 1000 1030 975 940 920 945 [61] 874 872 870 869 766 739 2050 > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/wessaorg/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/wessaorg/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/17u0v1323886330.tab") + } + } > m Conditional inference tree with 3 terminal nodes Response: TAX Inputs: PRICE, SQFT, AGE, FEATS, NE, COR Number of observations: 67 1) PRICE <= 1273; criterion = 1, statistic = 15.566 2)* weights = 35 1) PRICE > 1273 3) COR <= 701; criterion = 0.982, statistic = 8.821 4)* weights = 19 3) COR > 701 5)* weights = 13 > postscript(file="/var/wessaorg/rcomp/tmp/2wx1c1323886330.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/wessaorg/rcomp/tmp/3af7c1323886330.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } Actuals Forecasts Residuals 1 1639 1307.5789 331.42105 2 1193 1307.5789 -114.57895 3 1635 1307.5789 327.42105 4 1732 1307.5789 424.42105 5 1534 1307.5789 226.42105 6 1765 1307.5789 457.42105 7 1161 1307.5789 -146.57895 8 1010 1307.5789 -297.57895 9 1191 1307.5789 -116.57895 10 930 831.7429 98.25714 11 984 831.7429 152.25714 12 1112 831.7429 280.25714 13 600 831.7429 -231.74286 14 794 831.7429 -37.74286 15 867 831.7429 35.25714 16 750 831.7429 -81.74286 17 743 831.7429 -88.74286 18 731 831.7429 -100.74286 19 768 831.7429 -63.74286 20 1142 1307.5789 -165.57895 21 1035 1307.5789 -272.57895 22 626 831.7429 -205.74286 23 600 831.7429 -231.74286 24 600 831.7429 -231.74286 25 398 831.7429 -433.74286 26 656 831.7429 -175.74286 27 1487 1307.5789 179.42105 28 939 1307.5789 -368.57895 29 1232 1307.5789 -75.57895 30 1141 831.7429 309.25714 31 810 831.7429 -21.74286 32 800 831.7429 -31.74286 33 1076 831.7429 244.25714 34 875 831.7429 43.25714 35 690 831.7429 -141.74286 36 820 831.7429 -11.74286 37 456 831.7429 -375.74286 38 821 831.7429 -10.74286 39 461 831.7429 -370.74286 40 513 831.7429 -318.74286 41 504 831.7429 -327.74286 42 975 831.7429 143.25714 43 939 1307.5789 -368.57895 44 2100 1307.5789 792.42105 45 580 955.0769 -375.07692 46 1844 831.7429 1012.25714 47 699 955.0769 -256.07692 48 1160 1307.5789 -147.57895 49 1109 955.0769 153.92308 50 1129 955.0769 173.92308 51 1050 955.0769 94.92308 52 1045 955.0769 89.92308 53 1050 955.0769 94.92308 54 1020 955.0769 64.92308 55 1000 955.0769 44.92308 56 1030 1307.5789 -277.57895 57 975 955.0769 19.92308 58 940 955.0769 -15.07692 59 920 1307.5789 -387.57895 60 945 955.0769 -10.07692 61 874 955.0769 -81.07692 62 872 831.7429 40.25714 63 870 831.7429 38.25714 64 869 831.7429 37.25714 65 766 831.7429 -65.74286 66 739 831.7429 -92.74286 67 2050 831.7429 1218.25714 > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } > postscript(file="/var/wessaorg/rcomp/tmp/422p21323886330.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/5g4211323886331.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/6gwy81323886331.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/7lfju1323886331.tab") + } > > try(system("convert tmp/2wx1c1323886330.ps tmp/2wx1c1323886330.png",intern=TRUE)) character(0) > try(system("convert tmp/3af7c1323886330.ps tmp/3af7c1323886330.png",intern=TRUE)) character(0) > try(system("convert tmp/422p21323886330.ps tmp/422p21323886330.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 2.996 0.284 3.281