R version 2.13.0 (2011-04-13) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i486-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > par9 = 'CSUQ' > par8 = 'CSUQ' > par7 = 'all' > par6 = 'bachelor' > par5 = 'male' > par4 = 'no' > par3 = '3' > par2 = 'none' > par1 = '0' > par9 <- 'CSUQ' > par8 <- 'CSUQ' > par7 <- 'all' > par6 <- 'bachelor' > par5 <- 'male' > par4 <- 'no' > par3 <- '3' > par2 <- 'none' > par1 <- '0' > #'GNU S' R Code compiled by R2WASP v. 1.2.291 () > #Author: root > #To cite this work: Wessa P., 2012, Recursive Partitioning (Regression Trees) in Information Management (v1.0.8) in Free Statistics Software (v$_version), Office for Research Development and Education, URL http://www.wessa.net/rwasp_regression_trees.wasp/ > #Source of accompanying publication: > # > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- as.data.frame(read.table(file='http://www.wessa.net/download/utaut.csv',sep=',',header=T)) > x$U25 <- 6-x$U25 > if(par5 == 'female') x <- x[x$Gender==0,] > if(par5 == 'male') x <- x[x$Gender==1,] > if(par6 == 'prep') x <- x[x$Pop==1,] > if(par6 == 'bachelor') x <- x[x$Pop==0,] > if(par7 != 'all') { + x <- x[x$Year==as.numeric(par7),] + } > cAc <- with(x,cbind( A1, A2, A3, A4, A5, A6, A7, A8, A9,A10)) > cAs <- with(x,cbind(A11,A12,A13,A14,A15,A16,A17,A18,A19,A20)) > cA <- cbind(cAc,cAs) > cCa <- with(x,cbind(C1,C3,C5,C7, C9,C11,C13,C15,C17,C19,C21,C23,C25,C27,C29,C31,C33,C35,C37,C39,C41,C43,C45,C47)) > cCp <- with(x,cbind(C2,C4,C6,C8,C10,C12,C14,C16,C18,C20,C22,C24,C26,C28,C30,C32,C34,C36,C38,C40,C42,C44,C46,C48)) > cC <- cbind(cCa,cCp) > cU <- with(x,cbind(U1,U2,U3,U4,U5,U6,U7,U8,U9,U10,U11,U12,U13,U14,U15,U16,U17,U18,U19,U20,U21,U22,U23,U24,U25,U26,U27,U28,U29,U30,U31,U32,U33)) > cE <- with(x,cbind(BC,NNZFG,MRT,AFL,LPM,LPC,W,WPA)) > cX <- with(x,cbind(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16,X17,X18)) > if (par8=='ATTLES connected') x <- cAc > if (par8=='ATTLES separate') x <- cAs > if (par8=='ATTLES all') x <- cA > if (par8=='COLLES actuals') x <- cCa > if (par8=='COLLES preferred') x <- cCp > if (par8=='COLLES all') x <- cC > if (par8=='CSUQ') x <- cU > if (par8=='Learning Activities') x <- cE > if (par8=='Exam Items') x <- cX > if (par9=='ATTLES connected') y <- cAc > if (par9=='ATTLES separate') y <- cAs > if (par9=='ATTLES all') y <- cA > if (par9=='COLLES actuals') y <- cCa > if (par9=='COLLES preferred') y <- cCp > if (par9=='COLLES all') y <- cC > if (par9=='CSUQ') y <- cU > if (par9=='Learning Activities') y <- cE > if (par9=='Exam Items') y <- cX > if (par1==0) { + nr <- length(y[,1]) + nc <- length(y[1,]) + mysum <- array(0,dim=nr) + for(jjj in 1:nr) { + for(iii in 1:nc) { + mysum[jjj] = mysum[jjj] + y[jjj,iii] + } + } + y <- mysum + } else { + y <- y[,par1] + } > nx <- cbind(y,x) > colnames(nx) <- c('endo',colnames(x)) > x <- nx > par1=1 > ncol <- length(x[1,]) > for (jjj in 1:ncol) { + x <- x[!is.na(x[,jjj]),] + } > x <- as.data.frame(x) > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "endo" > x[,par1] [1] 110 107 93 106 121 98 96 110 134 149 118 95 107 121 61 118 109 124 [19] 143 112 87 130 121 120 111 100 126 126 123 76 81 92 38 141 120 124 [37] 129 111 84 123 124 97 132 110 127 136 87 87 94 138 90 71 80 122 [55] 126 107 131 125 144 128 127 136 120 102 119 87 95 118 136 105 123 104 [73] 121 113 94 133 107 80 112 66 126 133 140 133 130 92 125 116 110 117 [91] 122 130 128 142 133 89 117 124 144 136 94 140 112 141 119 114 142 149 [109] 91 130 132 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) 38 61 66 71 76 80 81 84 87 89 90 91 92 93 94 95 96 97 98 100 1 1 1 1 1 2 1 1 4 1 1 1 2 1 3 2 1 1 1 1 102 104 105 106 107 109 110 111 112 113 114 116 117 118 119 120 121 122 123 124 1 1 1 1 4 1 4 2 3 1 1 1 2 3 2 3 4 2 3 4 125 126 127 128 129 130 131 132 133 134 136 138 140 141 142 143 144 149 2 4 2 2 1 4 1 2 4 1 4 1 2 2 2 1 2 2 > colnames(x) [1] "endo" "U1" "U2" "U3" "U4" "U5" "U6" "U7" "U8" "U9" [11] "U10" "U11" "U12" "U13" "U14" "U15" "U16" "U17" "U18" "U19" [21] "U20" "U21" "U22" "U23" "U24" "U25" "U26" "U27" "U28" "U29" [31] "U30" "U31" "U32" "U33" > colnames(x)[par1] [1] "endo" > x[,par1] [1] 110 107 93 106 121 98 96 110 134 149 118 95 107 121 61 118 109 124 [19] 143 112 87 130 121 120 111 100 126 126 123 76 81 92 38 141 120 124 [37] 129 111 84 123 124 97 132 110 127 136 87 87 94 138 90 71 80 122 [55] 126 107 131 125 144 128 127 136 120 102 119 87 95 118 136 105 123 104 [73] 121 113 94 133 107 80 112 66 126 133 140 133 130 92 125 116 110 117 [91] 122 130 128 142 133 89 117 124 144 136 94 140 112 141 119 114 142 149 [109] 91 130 132 > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/wessaorg/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/wessaorg/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/14jeu1335888685.tab") + } + } > m Conditional inference tree with 8 terminal nodes Response: endo Inputs: U1, U2, U3, U4, U5, U6, U7, U8, U9, U10, U11, U12, U13, U14, U15, U16, U17, U18, U19, U20, U21, U22, U23, U24, U25, U26, U27, U28, U29, U30, U31, U32, U33 Number of observations: 111 1) U20 <= 3; criterion = 1, statistic = 79.88 2) U31 <= 3; criterion = 1, statistic = 19.132 3) U5 <= 2; criterion = 0.994, statistic = 14.073 4)* weights = 10 3) U5 > 2 5)* weights = 15 2) U31 > 3 6)* weights = 10 1) U20 > 3 7) U22 <= 4; criterion = 1, statistic = 36.768 8) U30 <= 3; criterion = 1, statistic = 26.133 9)* weights = 13 8) U30 > 3 10) U4 <= 4; criterion = 0.99, statistic = 13.118 11)* weights = 29 10) U4 > 4 12)* weights = 9 7) U22 > 4 13) U11 <= 3; criterion = 0.958, statistic = 10.353 14)* weights = 13 13) U11 > 3 15)* weights = 12 > postscript(file="/var/wessaorg/rcomp/tmp/20lia1335888685.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/wessaorg/rcomp/tmp/3bpa91335888685.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } Actuals Forecasts Residuals 1 110 109.76923 0.23076923 2 107 109.76923 -2.76923077 3 93 106.10000 -13.10000000 4 106 106.10000 -0.10000000 5 121 129.46154 -8.46153846 6 98 106.10000 -8.10000000 7 96 109.76923 -13.76923077 8 110 120.96552 -10.96551724 9 134 120.96552 13.03448276 10 149 140.83333 8.16666667 11 118 120.96552 -2.96551724 12 95 109.76923 -14.76923077 13 107 109.76923 -2.76923077 14 121 120.96552 0.03448276 15 61 73.20000 -12.20000000 16 118 120.96552 -2.96551724 17 109 120.96552 -11.96551724 18 124 129.46154 -5.46153846 19 143 131.33333 11.66666667 20 112 109.76923 2.23076923 21 87 73.20000 13.80000000 22 130 131.33333 -1.33333333 23 121 120.96552 0.03448276 24 120 120.96552 -0.96551724 25 111 109.76923 1.23076923 26 100 94.46667 5.53333333 27 126 120.96552 5.03448276 28 126 120.96552 5.03448276 29 123 129.46154 -6.46153846 30 76 73.20000 2.80000000 31 81 73.20000 7.80000000 32 92 94.46667 -2.46666667 33 38 73.20000 -35.20000000 34 141 140.83333 0.16666667 35 120 106.10000 13.90000000 36 124 129.46154 -5.46153846 37 129 140.83333 -11.83333333 38 111 109.76923 1.23076923 39 84 94.46667 -10.46666667 40 123 120.96552 2.03448276 41 124 120.96552 3.03448276 42 97 94.46667 2.53333333 43 132 131.33333 0.66666667 44 110 120.96552 -10.96551724 45 127 129.46154 -2.46153846 46 136 140.83333 -4.83333333 47 87 94.46667 -7.46666667 48 87 94.46667 -7.46666667 49 94 94.46667 -0.46666667 50 138 140.83333 -2.83333333 51 90 106.10000 -16.10000000 52 71 73.20000 -2.20000000 53 80 73.20000 6.80000000 54 122 120.96552 1.03448276 55 126 120.96552 5.03448276 56 107 94.46667 12.53333333 57 131 129.46154 1.53846154 58 125 120.96552 4.03448276 59 144 140.83333 3.16666667 60 128 131.33333 -3.33333333 61 127 120.96552 6.03448276 62 136 129.46154 6.53846154 63 120 120.96552 -0.96551724 64 102 109.76923 -7.76923077 65 119 120.96552 -1.96551724 66 87 94.46667 -7.46666667 67 95 106.10000 -11.10000000 68 118 120.96552 -2.96551724 69 136 140.83333 -4.83333333 70 105 106.10000 -1.10000000 71 123 131.33333 -8.33333333 72 104 94.46667 9.53333333 73 121 129.46154 -8.46153846 74 113 120.96552 -7.96551724 75 94 94.46667 -0.46666667 76 133 129.46154 3.53846154 77 107 106.10000 0.90000000 78 80 73.20000 6.80000000 79 112 109.76923 2.23076923 80 66 73.20000 -7.20000000 81 126 120.96552 5.03448276 82 133 131.33333 1.66666667 83 140 129.46154 10.53846154 84 133 106.10000 26.90000000 85 130 129.46154 0.53846154 86 92 73.20000 18.80000000 87 125 120.96552 4.03448276 88 116 109.76923 6.23076923 89 110 94.46667 15.53333333 90 117 120.96552 -3.96551724 91 122 120.96552 1.03448276 92 130 131.33333 -1.33333333 93 128 120.96552 7.03448276 94 142 140.83333 1.16666667 95 133 131.33333 1.66666667 96 89 94.46667 -5.46666667 97 117 120.96552 -3.96551724 98 124 120.96552 3.03448276 99 144 140.83333 3.16666667 100 136 109.76923 26.23076923 101 94 94.46667 -0.46666667 102 140 140.83333 -0.83333333 103 112 109.76923 2.23076923 104 141 129.46154 11.53846154 105 119 120.96552 -1.96551724 106 114 106.10000 7.90000000 107 142 140.83333 1.16666667 108 149 140.83333 8.16666667 109 91 94.46667 -3.46666667 110 130 131.33333 -1.33333333 111 132 129.46154 2.53846154 > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } > postscript(file="/var/wessaorg/rcomp/tmp/4pa8i1335888685.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/516p51335888685.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/61ycz1335888685.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/wessaorg/rcomp/tmp/7saht1335888685.tab") + } > > try(system("convert tmp/20lia1335888685.ps tmp/20lia1335888685.png",intern=TRUE)) character(0) > try(system("convert tmp/3bpa91335888685.ps tmp/3bpa91335888685.png",intern=TRUE)) character(0) > try(system("convert tmp/4pa8i1335888685.ps tmp/4pa8i1335888685.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 4.298 0.428 4.726