7.2 Coding guide

7.2.1 Titanic data desparse

library(e1071) 
data("Titanic")
titan <- data.table(Titanic)
rm(Titanic)

repr <- rep.int(seq(1,nrow(titan),1), titan$N)
titan <- titan[repr,]
titan$N <-NULL

7.2.2 Ctree procedure

library(partykit)
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
titan$Survived <- as.factor(titan$Survived)
titan$Class <- as.factor(titan$Class)
titan$Sex <- as.factor(titan$Sex)
titan$Age <- as.factor(titan$Age)
tree <- ctree(Survived~Class+Sex+Age,data=titan)
plot(tree) 

plot(nodeprune(tree,c(3,10)))

pred2 <- predict(tree)
mean(pred2 == titan$Survived)
## [1] 0.7882781
table(pred2,titan$Survived)
##      
## pred2   No  Yes
##   No  1470  446
##   Yes   20  265

7.2.3 Trees for continuous variables

bwght$smokes <- as.numeric(bwght$cigs>0)
bwght$smokes <- as.factor(bwght$smokes)
plot(ctree(bwght~cigs+faminc+male+white,data=bwght))

plot(ctree(cigs~faminc+white,data=bwght))

plot(ctree(smokes~faminc+white,data=bwght))

plot(ctree(wage~educ+exper+tenure,data=wage1))

7.2.4 GLM trees

glmtree(wage~educ+exper+tenure,data=wage1)
## Generalized linear model tree (family: gaussian)
## 
## Model formula:
## wage ~ 1 | educ + exper + tenure
## 
## Fitted party:
## [1] root
## |   [2] educ <= 15
## |   |   [3] tenure <= 3
## |   |   |   [4] exper <= 7
## |   |   |   |   [5] educ <= 11: n = 37
## |   |   |   |       (Intercept) 
## |   |   |   |          2.932432 
## |   |   |   |   [6] educ > 11: n = 74
## |   |   |   |       (Intercept) 
## |   |   |   |          3.996351 
## |   |   |   [7] exper > 7: n = 145
## |   |   |       (Intercept) 
## |   |   |          4.762759 
## |   |   [8] tenure > 3
## |   |   |   [9] educ <= 10: n = 34
## |   |   |       (Intercept) 
## |   |   |          4.752941 
## |   |   |   [10] educ > 10
## |   |   |   |   [11] tenure <= 14: n = 106
## |   |   |   |       (Intercept) 
## |   |   |   |          6.419811 
## |   |   |   |   [12] tenure > 14: n = 31
## |   |   |   |       (Intercept) 
## |   |   |   |          9.096774 
## |   [13] educ > 15
## |   |   [14] tenure <= 6: n = 74
## |   |       (Intercept) 
## |   |          7.614865 
## |   |   [15] tenure > 6: n = 25
## |   |       (Intercept) 
## |   |            13.028 
## 
## Number of inner nodes:    7
## Number of terminal nodes: 8
## Number of parameters per node: 1
## Objective function (negative log-likelihood): 1198.485
glmtree(wage~exper+tenure|educ,data=wage1)
## Generalized linear model tree (family: gaussian)
## 
## Model formula:
## wage ~ exper + tenure | educ
## 
## Fitted party:
## [1] root
## |   [2] educ <= 15
## |   |   [3] educ <= 11: n = 116
## |   |       (Intercept)       exper      tenure 
## |   |       3.446722837 0.001429165 0.098414049 
## |   |   [4] educ > 11: n = 311
## |   |       (Intercept)       exper      tenure 
## |   |       4.602945302 0.008918606 0.177646112 
## |   [5] educ > 15
## |   |   [6] educ <= 16: n = 68
## |   |       (Intercept)       exper      tenure 
## |   |        6.46734370  0.05487934  0.21869650 
## |   |   [7] educ > 16: n = 31
## |   |       (Intercept)       exper      tenure 
## |   |         7.3390757   0.1250740   0.3436781 
## 
## Number of inner nodes:    3
## Number of terminal nodes: 4
## Number of parameters per node: 3
## Objective function (negative log-likelihood): 1282.396
glmtree(wage~tenure|educ,data=wage1) 
## Generalized linear model tree (family: gaussian)
## 
## Model formula:
## wage ~ tenure | educ
## 
## Fitted party:
## [1] root
## |   [2] educ <= 15
## |   |   [3] educ <= 11: n = 116
## |   |       (Intercept)      tenure 
## |   |        3.46782663  0.09990147 
## |   |   [4] educ > 11: n = 311
## |   |       (Intercept)      tenure 
## |   |         4.7189951   0.1850875 
## |   [5] educ > 15
## |   |   [6] educ <= 16: n = 68
## |   |       (Intercept)      tenure 
## |   |         6.8591275   0.2857908 
## |   |   [7] educ > 16: n = 31
## |   |       (Intercept)      tenure 
## |   |         8.2314959   0.4412689 
## 
## Number of inner nodes:    3
## Number of terminal nodes: 4
## Number of parameters per node: 2
## Objective function (negative log-likelihood): 1283.494
glmtree(wage~educ+exper+tenure|nonwhite+female+married,data=wage1)
## Generalized linear model tree (family: gaussian)
## 
## Model formula:
## wage ~ educ + exper + tenure | nonwhite + female + married
## 
## Fitted party:
## [1] root
## |   [2] female <= 0
## |   |   [3] married <= 0: n = 86
## |   |       (Intercept)        educ       exper      tenure 
## |   |       -1.19319465  0.43687735  0.07595967  0.02451867 
## |   |   [4] married > 0: n = 188
## |   |       (Intercept)        educ       exper      tenure 
## |   |       -2.92454292  0.69539837  0.02934353  0.16956272 
## |   [5] female > 0
## |   |   [6] married <= 0: n = 120
## |   |        (Intercept)         educ        exper       tenure 
## |   |       -2.858646997  0.561990263  0.009881087  0.191828643 
## |   |   [7] married > 0: n = 132
## |   |         (Intercept)          educ         exper        tenure 
## |   |        0.5485905116  0.3253589998 -0.0005633994 -0.0036062743 
## 
## Number of inner nodes:    3
## Number of terminal nodes: 4
## Number of parameters per node: 4
## Objective function (negative log-likelihood): 1253.491