R version 2.13.0 (2011-04-13) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i486-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > par9 = 'ATTLES connected' > par8 = 'ATTLES connected' > par7 = 'all' > par6 = 'prep' > par5 = 'female' > par4 = 'no' > par3 = '3' > par2 = 'none' > par1 = '7' > par9 <- 'ATTLES connected' > par8 <- 'ATTLES connected' > par7 <- 'all' > par6 <- 'prep' > par5 <- 'female' > par4 <- 'no' > par3 <- '3' > par2 <- 'none' > par1 <- '7' > #'GNU S' R Code compiled by R2WASP v. 1.2.291 () > #Author: root > #To cite this work: Wessa P., 2012, Recursive Partitioning (Regression Trees) in Information Management (v1.0.8) in Free Statistics Software (v$_version), Office for Research Development and Education, URL http://www.wessa.net/rwasp_regression_trees.wasp/ > #Source of accompanying publication: > # > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- as.data.frame(read.table(file='http://www.wessa.net/download/utaut.csv',sep=',',header=T)) > x$U25 <- 6-x$U25 > if(par5 == 'female') x <- x[x$Gender==0,] > if(par5 == 'male') x <- x[x$Gender==1,] > if(par6 == 'prep') x <- x[x$Pop==1,] > if(par6 == 'bachelor') x <- x[x$Pop==0,] > if(par7 != 'all') { + x <- x[x$Year==as.numeric(par7),] + } > cAc <- with(x,cbind( A1, A2, A3, A4, A5, A6, A7, A8, A9,A10)) > cAs <- with(x,cbind(A11,A12,A13,A14,A15,A16,A17,A18,A19,A20)) > cA <- cbind(cAc,cAs) > cCa <- with(x,cbind(C1,C3,C5,C7, C9,C11,C13,C15,C17,C19,C21,C23,C25,C27,C29,C31,C33,C35,C37,C39,C41,C43,C45,C47)) > cCp <- with(x,cbind(C2,C4,C6,C8,C10,C12,C14,C16,C18,C20,C22,C24,C26,C28,C30,C32,C34,C36,C38,C40,C42,C44,C46,C48)) > cC <- cbind(cCa,cCp) > cU <- with(x,cbind(U1,U2,U3,U4,U5,U6,U7,U8,U9,U10,U11,U12,U13,U14,U15,U16,U17,U18,U19,U20,U21,U22,U23,U24,U25,U26,U27,U28,U29,U30,U31,U32,U33)) > cE <- with(x,cbind(BC,NNZFG,MRT,AFL,LPM,LPC,W,WPA)) > cX <- with(x,cbind(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16,X17,X18)) > if (par8=='ATTLES connected') x <- cAc > if (par8=='ATTLES separate') x <- cAs > if (par8=='ATTLES all') x <- cA > if (par8=='COLLES actuals') x <- cCa > if (par8=='COLLES preferred') x <- cCp > if (par8=='COLLES all') x <- cC > if (par8=='CSUQ') x <- cU > if (par8=='Learning Activities') x <- cE > if (par8=='Exam Items') x <- cX > if (par9=='ATTLES connected') y <- cAc > if (par9=='ATTLES separate') y <- cAs > if (par9=='ATTLES all') y <- cA > if (par9=='COLLES actuals') y <- cCa > if (par9=='COLLES preferred') y <- cCp > if (par9=='COLLES all') y <- cC > if (par9=='CSUQ') y <- cU > if (par9=='Learning Activities') y <- cE > if (par9=='Exam Items') y <- cX > if (par1==0) { + nr <- length(y[,1]) + nc <- length(y[1,]) + mysum <- array(0,dim=nr) + for(jjj in 1:nr) { + for(iii in 1:nc) { + mysum[jjj] = mysum[jjj] + y[jjj,iii] + } + } + y <- mysum + } else { + y <- y[,par1] + } > nx <- cbind(y,x) > colnames(nx) <- c('endo',colnames(x)) > x <- nx > par1=1 > ncol <- length(x[1,]) > for (jjj in 1:ncol) { + x <- x[!is.na(x[,jjj]),] + } > x <- as.data.frame(x) > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "endo" > x[,par1] [1] 4 4 2 3 3 5 4 4 2 4 4 3 3 4 4 4 3 4 2 5 3 4 4 4 3 4 2 4 4 4 4 4 4 4 3 5 4 [38] 5 4 3 3 4 5 3 3 2 5 3 5 4 3 3 3 4 3 4 3 4 3 4 5 3 2 4 4 3 3 3 3 3 4 4 5 5 [75] 4 4 3 4 5 3 5 2 3 4 3 3 2 3 4 4 4 3 4 4 3 5 3 4 3 4 4 3 2 3 2 4 3 4 2 4 4 [112] 2 4 4 3 3 5 4 3 4 3 4 4 5 2 4 2 2 3 4 4 5 5 5 4 3 2 4 4 4 4 4 4 4 2 4 4 4 [149] 2 4 2 4 4 4 4 5 5 2 4 5 4 4 4 5 5 4 5 4 3 3 4 5 4 5 4 4 2 4 5 4 3 3 5 3 3 [186] 4 4 2 2 4 3 4 3 4 3 4 4 5 4 4 4 2 3 2 3 4 5 4 4 3 4 4 3 5 4 4 4 3 5 3 5 3 [223] 4 4 4 4 4 4 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) 2 3 4 5 25 60 110 33 > colnames(x) [1] "endo" "A1" "A2" "A3" "A4" "A5" "A6" "A7" "A8" "A9" [11] "A10" > colnames(x)[par1] [1] "endo" > x[,par1] [1] 4 4 2 3 3 5 4 4 2 4 4 3 3 4 4 4 3 4 2 5 3 4 4 4 3 4 2 4 4 4 4 4 4 4 3 5 4 [38] 5 4 3 3 4 5 3 3 2 5 3 5 4 3 3 3 4 3 4 3 4 3 4 5 3 2 4 4 3 3 3 3 3 4 4 5 5 [75] 4 4 3 4 5 3 5 2 3 4 3 3 2 3 4 4 4 3 4 4 3 5 3 4 3 4 4 3 2 3 2 4 3 4 2 4 4 [112] 2 4 4 3 3 5 4 3 4 3 4 4 5 2 4 2 2 3 4 4 5 5 5 4 3 2 4 4 4 4 4 4 4 2 4 4 4 [149] 2 4 2 4 4 4 4 5 5 2 4 5 4 4 4 5 5 4 5 4 3 3 4 5 4 5 4 4 2 4 5 4 3 3 5 3 3 [186] 4 4 2 2 4 3 4 3 4 3 4 4 5 4 4 4 2 3 2 3 4 5 4 4 3 4 4 3 5 4 4 4 3 5 3 5 3 [223] 4 4 4 4 4 4 > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/wessaorg/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/wessaorg/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/1w7g51335911312.tab") + } + } > m Conditional inference tree with 4 terminal nodes Response: endo Inputs: A1, A2, A3, A4, A5, A6, A7, A8, A9, A10 Number of observations: 228 1) A7 <= 3; criterion = 1, statistic = 227 2) A7 <= 2; criterion = 1, statistic = 84 3)* weights = 25 2) A7 > 2 4)* weights = 60 1) A7 > 3 5) A7 <= 4; criterion = 1, statistic = 142 6)* weights = 110 5) A7 > 4 7)* weights = 33 > postscript(file="/var/wessaorg/rcomp/tmp/2xg2z1335911312.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/wessaorg/rcomp/tmp/3x3bx1335911312.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } Actuals Forecasts Residuals 1 4 4 0 2 4 4 0 3 2 2 0 4 3 3 0 5 3 3 0 6 5 5 0 7 4 4 0 8 4 4 0 9 2 2 0 10 4 4 0 11 4 4 0 12 3 3 0 13 3 3 0 14 4 4 0 15 4 4 0 16 4 4 0 17 3 3 0 18 4 4 0 19 2 2 0 20 5 5 0 21 3 3 0 22 4 4 0 23 4 4 0 24 4 4 0 25 3 3 0 26 4 4 0 27 2 2 0 28 4 4 0 29 4 4 0 30 4 4 0 31 4 4 0 32 4 4 0 33 4 4 0 34 4 4 0 35 3 3 0 36 5 5 0 37 4 4 0 38 5 5 0 39 4 4 0 40 3 3 0 41 3 3 0 42 4 4 0 43 5 5 0 44 3 3 0 45 3 3 0 46 2 2 0 47 5 5 0 48 3 3 0 49 5 5 0 50 4 4 0 51 3 3 0 52 3 3 0 53 3 3 0 54 4 4 0 55 3 3 0 56 4 4 0 57 3 3 0 58 4 4 0 59 3 3 0 60 4 4 0 61 5 5 0 62 3 3 0 63 2 2 0 64 4 4 0 65 4 4 0 66 3 3 0 67 3 3 0 68 3 3 0 69 3 3 0 70 3 3 0 71 4 4 0 72 4 4 0 73 5 5 0 74 5 5 0 75 4 4 0 76 4 4 0 77 3 3 0 78 4 4 0 79 5 5 0 80 3 3 0 81 5 5 0 82 2 2 0 83 3 3 0 84 4 4 0 85 3 3 0 86 3 3 0 87 2 2 0 88 3 3 0 89 4 4 0 90 4 4 0 91 4 4 0 92 3 3 0 93 4 4 0 94 4 4 0 95 3 3 0 96 5 5 0 97 3 3 0 98 4 4 0 99 3 3 0 100 4 4 0 101 4 4 0 102 3 3 0 103 2 2 0 104 3 3 0 105 2 2 0 106 4 4 0 107 3 3 0 108 4 4 0 109 2 2 0 110 4 4 0 111 4 4 0 112 2 2 0 113 4 4 0 114 4 4 0 115 3 3 0 116 3 3 0 117 5 5 0 118 4 4 0 119 3 3 0 120 4 4 0 121 3 3 0 122 4 4 0 123 4 4 0 124 5 5 0 125 2 2 0 126 4 4 0 127 2 2 0 128 2 2 0 129 3 3 0 130 4 4 0 131 4 4 0 132 5 5 0 133 5 5 0 134 5 5 0 135 4 4 0 136 3 3 0 137 2 2 0 138 4 4 0 139 4 4 0 140 4 4 0 141 4 4 0 142 4 4 0 143 4 4 0 144 4 4 0 145 2 2 0 146 4 4 0 147 4 4 0 148 4 4 0 149 2 2 0 150 4 4 0 151 2 2 0 152 4 4 0 153 4 4 0 154 4 4 0 155 4 4 0 156 5 5 0 157 5 5 0 158 2 2 0 159 4 4 0 160 5 5 0 161 4 4 0 162 4 4 0 163 4 4 0 164 5 5 0 165 5 5 0 166 4 4 0 167 5 5 0 168 4 4 0 169 3 3 0 170 3 3 0 171 4 4 0 172 5 5 0 173 4 4 0 174 5 5 0 175 4 4 0 176 4 4 0 177 2 2 0 178 4 4 0 179 5 5 0 180 4 4 0 181 3 3 0 182 3 3 0 183 5 5 0 184 3 3 0 185 3 3 0 186 4 4 0 187 4 4 0 188 2 2 0 189 2 2 0 190 4 4 0 191 3 3 0 192 4 4 0 193 3 3 0 194 4 4 0 195 3 3 0 196 4 4 0 197 4 4 0 198 5 5 0 199 4 4 0 200 4 4 0 201 4 4 0 202 2 2 0 203 3 3 0 204 2 2 0 205 3 3 0 206 4 4 0 207 5 5 0 208 4 4 0 209 4 4 0 210 3 3 0 211 4 4 0 212 4 4 0 213 3 3 0 214 5 5 0 215 4 4 0 216 4 4 0 217 4 4 0 218 3 3 0 219 5 5 0 220 3 3 0 221 5 5 0 222 3 3 0 223 4 4 0 224 4 4 0 225 4 4 0 226 4 4 0 227 4 4 0 228 4 4 0 > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } > postscript(file="/var/wessaorg/rcomp/tmp/4qkpz1335911312.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/5qs8s1335911312.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/6l9t71335911312.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/7m2xa1335911312.tab") + } > > try(system("convert tmp/2xg2z1335911312.ps tmp/2xg2z1335911312.png",intern=TRUE)) character(0) > try(system("convert tmp/3x3bx1335911312.ps tmp/3x3bx1335911312.png",intern=TRUE)) character(0) > try(system("convert tmp/4qkpz1335911312.ps tmp/4qkpz1335911312.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 3.761 0.324 4.093