R version 2.13.0 (2011-04-13) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i486-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > x <- array(list(2050 + ,2650 + ,13 + ,7 + ,1 + ,0 + ,1639 + ,2150 + ,2664 + ,6 + ,5 + ,1 + ,0 + ,1193 + ,2150 + ,2921 + ,3 + ,6 + ,1 + ,0 + ,1635 + ,1999 + ,2580 + ,4 + ,4 + ,1 + ,0 + ,1732 + ,1900 + ,2580 + ,4 + ,4 + ,1 + ,0 + ,1534 + ,1800 + ,2774 + ,2 + ,4 + ,1 + ,0 + ,1765 + ,1560 + ,1920 + ,1 + ,5 + ,1 + ,0 + ,1161 + ,1449 + ,1710 + ,1 + ,3 + ,1 + ,0 + ,1010 + ,1375 + ,1837 + ,4 + ,5 + ,1 + ,0 + ,1191 + ,1270 + ,1880 + ,8 + ,6 + ,1 + ,0 + ,930 + ,1250 + ,2150 + ,15 + ,3 + ,1 + ,0 + ,984 + ,1235 + ,1894 + ,14 + ,5 + ,1 + ,0 + ,1112 + ,1170 + ,1928 + ,18 + ,8 + ,1 + ,0 + ,600 + ,1155 + ,1767 + ,16 + ,4 + ,1 + ,0 + ,794 + ,1110 + ,1630 + ,15 + ,3 + ,1 + ,1 + ,867 + ,1139 + ,1680 + ,17 + ,4 + ,1 + ,1 + ,750 + ,995 + ,1500 + ,15 + ,4 + ,1 + ,0 + ,743 + ,900 + ,1400 + ,16 + ,2 + ,1 + ,1 + ,731 + ,960 + ,1573 + ,17 + ,6 + ,1 + ,0 + ,768 + ,1695 + ,2931 + ,28 + ,3 + ,1 + ,1 + ,1142 + ,1553 + ,2200 + ,28 + ,4 + ,1 + ,0 + ,1035 + ,1020 + ,1478 + ,53 + ,3 + ,1 + ,1 + ,626 + ,1020 + ,1713 + ,30 + ,4 + ,1 + ,1 + ,600 + ,850 + ,1190 + ,41 + ,1 + ,1 + ,0 + ,600 + ,720 + ,1121 + ,46 + ,4 + ,1 + ,0 + ,398 + ,749 + ,1733 + ,43 + ,6 + ,1 + ,0 + ,656 + ,2150 + ,2848 + ,4 + ,6 + ,1 + ,0 + ,1487 + ,1350 + ,2253 + ,23 + ,4 + ,1 + ,0 + ,939 + ,1299 + ,2743 + ,25 + ,5 + ,1 + ,1 + ,1232 + ,1250 + ,2180 + ,17 + ,4 + ,1 + ,1 + ,1141 + ,1239 + ,1706 + ,14 + ,4 + ,1 + ,0 + ,810 + ,1125 + ,1710 + ,16 + ,4 + ,1 + ,0 + ,800 + ,1080 + ,2200 + ,26 + ,4 + ,1 + ,0 + ,1076 + ,1050 + ,1680 + ,13 + ,4 + ,1 + ,0 + ,875 + ,1049 + ,1900 + ,34 + ,3 + ,1 + ,0 + ,690 + ,934 + ,1543 + ,20 + ,3 + ,1 + ,0 + ,820 + ,875 + ,1173 + ,6 + ,4 + ,1 + ,0 + ,456 + ,805 + ,1258 + ,7 + ,4 + ,1 + ,1 + ,821 + ,759 + ,997 + ,4 + ,4 + ,1 + ,0 + ,461 + ,729 + ,1007 + ,19 + ,6 + ,1 + ,0 + ,513 + ,710 + ,1083 + ,22 + ,4 + ,1 + ,0 + ,504 + ,690 + ,1348 + ,15 + ,2 + ,1 + ,0 + ,975 + ,1500 + ,7 + ,3 + ,0 + ,1 + ,700 + ,939 + ,1428 + ,40 + ,2 + ,0 + ,0 + ,701 + ,2100 + ,2116 + ,25 + ,3 + ,0 + ,0 + ,1209 + ,580 + ,1051 + ,15 + ,2 + ,0 + ,0 + ,426 + ,1844 + ,2250 + ,40 + ,6 + ,0 + ,0 + ,915 + ,699 + ,1400 + ,45 + ,1 + ,0 + ,1 + ,481 + ,1160 + ,1720 + ,5 + ,4 + ,0 + ,0 + ,867 + ,1109 + ,1740 + ,4 + ,3 + ,0 + ,0 + ,816 + ,1129 + ,1700 + ,6 + ,4 + ,0 + ,0 + ,725 + ,1050 + ,1620 + ,6 + ,4 + ,0 + ,0 + ,800 + ,1045 + ,1630 + ,6 + ,4 + ,0 + ,0 + ,750 + ,1050 + ,1920 + ,8 + ,4 + ,0 + ,0 + ,944 + ,1020 + ,1606 + ,5 + ,4 + ,0 + ,0 + ,811 + ,1000 + ,1535 + ,7 + ,5 + ,0 + ,1 + ,668 + ,1030 + ,1540 + ,6 + ,2 + ,0 + ,1 + ,826 + ,975 + ,1739 + ,13 + ,3 + ,0 + ,0 + ,880 + ,940 + ,1305 + ,5 + ,3 + ,0 + ,0 + ,647 + ,920 + ,1415 + ,7 + ,4 + ,0 + ,0 + ,866 + ,945 + ,1580 + ,9 + ,3 + ,0 + ,0 + ,810 + ,874 + ,1236 + ,3 + ,4 + ,0 + ,0 + ,707 + ,872 + ,1229 + ,6 + ,3 + ,0 + ,0 + ,721 + ,870 + ,1273 + ,4 + ,4 + ,0 + ,0 + ,638 + ,869 + ,1165 + ,7 + ,4 + ,0 + ,0 + ,694 + ,766 + ,1200 + ,7 + ,4 + ,0 + ,1 + ,634 + ,739 + ,970 + ,4 + ,4 + ,0 + ,1 + ,541) + ,dim=c(7 + ,67) + ,dimnames=list(c('PRICE' + ,'SQFT' + ,'AGE' + ,'FEATS' + ,'NE' + ,'COR' + ,'TAX') + ,1:67)) > y <- array(NA,dim=c(7,67),dimnames=list(c('PRICE','SQFT','AGE','FEATS','NE','COR','TAX'),1:67)) > for (i in 1:dim(x)[1]) + { + for (j in 1:dim(x)[2]) + { + y[i,j] <- as.numeric(x[i,j]) + } + } > par4 = 'no' > par3 = '3' > par2 = 'none' > par1 = '1' > #'GNU S' R Code compiled by R2WASP v. 1.0.44 () > #Author: Dr. Ian E. Holliday > #To cite this work: Ian E. Holliday, 2009, YOUR SOFTWARE TITLE (vNUMBER) in Free Statistics Software (v$_version), Office for Research Development and Education, URL http://www.wessa.net/rwasp_YOURPAGE.wasp/ > #Source of accompanying publication: > #Technical description: > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- data.frame(t(y)) > is.data.frame(x) [1] TRUE > x <- x[!is.na(x[,par1]),] > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "PRICE" > x[,par1] [1] 2050 2150 2150 1999 1900 1800 1560 1449 1375 1270 1250 1235 1170 1155 1110 [16] 1139 995 900 960 1695 1553 1020 1020 850 720 749 2150 1350 1299 1250 [31] 1239 1125 1080 1050 1049 934 875 805 759 729 710 690 1500 1428 2116 [46] 1051 2250 1400 1720 1740 1700 1620 1630 1920 1606 1535 1540 1739 1305 1415 [61] 1580 1236 1229 1273 1165 1200 970 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) 690 710 720 729 749 759 805 850 875 900 934 960 970 995 1020 1049 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1050 1051 1080 1110 1125 1139 1155 1165 1170 1200 1229 1235 1236 1239 1250 1270 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1273 1299 1305 1350 1375 1400 1415 1428 1449 1500 1535 1540 1553 1560 1580 1606 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1620 1630 1695 1700 1720 1739 1740 1800 1900 1920 1999 2050 2116 2150 2250 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 > colnames(x) [1] "PRICE" "SQFT" "AGE" "FEATS" "NE" "COR" "TAX" > colnames(x)[par1] [1] "PRICE" > x[,par1] [1] 2050 2150 2150 1999 1900 1800 1560 1449 1375 1270 1250 1235 1170 1155 1110 [16] 1139 995 900 960 1695 1553 1020 1020 850 720 749 2150 1350 1299 1250 [31] 1239 1125 1080 1050 1049 934 875 805 759 729 710 690 1500 1428 2116 [46] 1051 2250 1400 1720 1740 1700 1620 1630 1920 1606 1535 1540 1739 1305 1415 [61] 1580 1236 1229 1273 1165 1200 970 > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/wessaorg/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/wessaorg/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/1xz671323876487.tab") + } + } > m Conditional inference tree with 3 terminal nodes Response: PRICE Inputs: SQFT, AGE, FEATS, NE, COR, TAX Number of observations: 67 1) AGE <= 6; criterion = 1, statistic = 16.161 2)* weights = 36 1) AGE > 6 3) SQFT <= 1733; criterion = 1, statistic = 22.154 4)* weights = 18 3) SQFT > 1733 5)* weights = 13 > postscript(file="/var/wessaorg/rcomp/tmp/2xiqk1323876487.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/wessaorg/rcomp/tmp/3hxz51323876487.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } Actuals Forecasts Residuals 1 2050 1338.9231 711.076923 2 2150 1556.5278 593.472222 3 2150 1556.5278 593.472222 4 1999 1556.5278 442.472222 5 1900 1556.5278 343.472222 6 1800 1556.5278 243.472222 7 1560 1556.5278 3.472222 8 1449 1556.5278 -107.527778 9 1375 1556.5278 -181.527778 10 1270 1338.9231 -68.923077 11 1250 1338.9231 -88.923077 12 1235 1338.9231 -103.923077 13 1170 1338.9231 -168.923077 14 1155 1338.9231 -183.923077 15 1110 930.2778 179.722222 16 1139 930.2778 208.722222 17 995 930.2778 64.722222 18 900 930.2778 -30.277778 19 960 930.2778 29.722222 20 1695 1338.9231 356.076923 21 1553 1338.9231 214.076923 22 1020 930.2778 89.722222 23 1020 930.2778 89.722222 24 850 930.2778 -80.277778 25 720 930.2778 -210.277778 26 749 930.2778 -181.277778 27 2150 1556.5278 593.472222 28 1350 1338.9231 11.076923 29 1299 1338.9231 -39.923077 30 1250 1338.9231 -88.923077 31 1239 930.2778 308.722222 32 1125 930.2778 194.722222 33 1080 1338.9231 -258.923077 34 1050 930.2778 119.722222 35 1049 1338.9231 -289.923077 36 934 930.2778 3.722222 37 875 1556.5278 -681.527778 38 805 930.2778 -125.277778 39 759 1556.5278 -797.527778 40 729 930.2778 -201.277778 41 710 930.2778 -220.277778 42 690 930.2778 -240.277778 43 1500 1556.5278 -56.527778 44 1428 1556.5278 -128.527778 45 2116 1556.5278 559.472222 46 1051 1556.5278 -505.527778 47 2250 1556.5278 693.472222 48 1400 1556.5278 -156.527778 49 1720 1556.5278 163.472222 50 1740 1556.5278 183.472222 51 1700 1556.5278 143.472222 52 1620 1556.5278 63.472222 53 1630 1556.5278 73.472222 54 1920 1556.5278 363.472222 55 1606 1556.5278 49.472222 56 1535 1556.5278 -21.527778 57 1540 1556.5278 -16.527778 58 1739 1556.5278 182.472222 59 1305 1556.5278 -251.527778 60 1415 1556.5278 -141.527778 61 1580 1556.5278 23.472222 62 1236 1556.5278 -320.527778 63 1229 1556.5278 -327.527778 64 1273 1556.5278 -283.527778 65 1165 1556.5278 -391.527778 66 1200 1556.5278 -356.527778 67 970 1556.5278 -586.527778 > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } > postscript(file="/var/wessaorg/rcomp/tmp/4f0n51323876487.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/5e3ap1323876487.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/6vj4i1323876487.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/7wwod1323876487.tab") + } > > try(system("convert tmp/2xiqk1323876487.ps tmp/2xiqk1323876487.png",intern=TRUE)) character(0) > try(system("convert tmp/3hxz51323876487.ps tmp/3hxz51323876487.png",intern=TRUE)) character(0) > try(system("convert tmp/4f0n51323876487.ps tmp/4f0n51323876487.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 2.685 0.223 2.902