library(rpart);

train <- read.table("loans.dat",header=F,col.names=c("x1","x2","y"));

fit <- rpart(y ~ x1 + x2, data=train, method="class",minsplit=15);

print(fit)

print(fit$control)

source("psopts.r");
postscript(file="cart11.eps");

par(mar=c(0,0,0,0));
plot(fit);
text(fit);
par();

dev.off();

size <- 100;

grid <- mat.or.vec((size+1)*(size+1),2);
for (i in 0:size) {
for (j in 0:size) {
  ij <- (size+1)*j+i+1
  grid[ij,1] <- i;
  grid[ij,2] <- j;
}
}
grid <- grid/(size+1);

gx1 <- grid[,1];
gx2 <- grid[,2];

dflt <- (gx1<0.3440913) | 
        ((gx1>=0.3440913)&(gx2<0.2215745))|
        ((gx1>=0.3440913)&(gx2>=0.2215745)&(gx1<0.4202267)&(gx2<0.524562));

edge <- mat.or.vec((size+1)*(size+1),1);
edge <- as.logical(edge);
for (i in 1:size) {
for (j in 1:size) {
  left <- (size+1)*j+i+1;
  rite <- (size+1)*(j-1)+i+1;
  top  <- left;
  bot  <- left - 1;
  edge[left] <- 
    ( dflt[left] && !dflt[rite] ) ||
    ( !dflt[left] && dflt[rite] ) ||
    ( dflt[top]  && !dflt[bot]  ) || 
    ( !dflt[top]  && dflt[bot]  ) ;
}
}

X <- cbind(train$x1,train$x2);
y <- (train$y==1);

source("psopts.r");
postscript(file="cart12.eps");

plot(X,type='n',xlab="FICO Score",ylab="P-Index");
points(X[y,],pch='o',col="red");
points(X[!y,],pch='o',col="green");
points(grid[dflt,],pch='.',col="red");
points(grid[!dflt,],pch='.',col="green");
points(grid[edge,],pch='+',col="black",cex=0.4);

dev.off();

test <- read.table("eval.dat",header=F,col.names=c("x1","x2","y"));

tx1 <- test$x1;
tx2 <- test$x2;
ty <- test$y;

#dflt <- (tx1 < 0.3441) | 
#        ( (tx1 >= 0.3441) & (tx2 < 0.2216) ) |
#        ( (tx1 >= 0.3441) & (tx2 >= 0.2216) & (tx1<0.4202) & (tx2<0.5246));

dflt <- (tx1<0.3440913) | 
        ((tx1>=0.3440913)&(tx2<0.2215745))|
        ((tx1>=0.3440913)&(tx2>=0.2215745)&(tx1<0.4202267)&(tx2<0.524562));

dflt <- as.numeric(dflt);

err <- abs(ty-dflt);
print(mean(err));

