
R : Copyright 2003, The R Development Core Team
Version 1.6.2  (2003-01-10)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type `license()' or `licence()' for distribution details.

R is a collaborative project with many contributors.
Type `contributors()' for more information.

Type `demo()' for some demos, `help()' for on-line help, or
`help.start()' for a HTML browser interface to help.
Type `q()' to quit R.

> invisible(options(echo = TRUE))
> target.lrn <- read.table("../lrn/num/472.dat",header=T,colClasses="numeric")
> target.val <- read.table("../val/num/472.dat",header=T,colClasses="numeric")
> target.tst <- read.table("../tst/num/472.dat",header=T,colClasses="numeric")
> 
>  
> y.lrn <- target.lrn[,1]
> y.val <- target.val[,1]
> y.tst <- target.tst[,1]
> y     <- c(y.lrn,y.val,y.tst)
> 
> n.lrn <- length(y.lrn)
> n.val <- length(y.val)
> n.tst <- length(y.tst)
> n     <- length(y)
> 
> rm(target.lrn,target.val,target.tst)
> 
> wts <- mat.or.vec(n,1) ; for (i in 1:n.lrn) wts[i]=1
> idx.lrn <- 1:n.lrn
> idx.val <- (n.lrn+1):(n.lrn+n.val)
> idx.tst <- (n.lrn+n.val+1):n
> 
> mod <- read.table("../cty_mod.txt",
+          header=F,colClasses="character",col.names=c("file","feature","type"))
> 
> n.mod <- length(mod$file)
> 
> first.time <- TRUE
> 
> for (i in 1:n.mod) {
+ 
+   fn.lrn <- paste("../lrn/",mod$type[i],"/",mod$file[i],".dat",sep="")
+   fn.val <- paste("../val/",mod$type[i],"/",mod$file[i],".dat",sep="")
+   fn.tst <- paste("../tst/",mod$type[i],"/",mod$file[i],".dat",sep="")
+   print(mod$feature[i])
+ 
+   if (mod$type[i]=="chr") {
+ 
+     f.lrn <- read.table(fn.lrn,
+                header=T,colClasses="character",blank.lines.skip=F)
+     f.val <- read.table(fn.val,
+                header=T,colClasses="character",blank.lines.skip=F)
+     f.tst <- read.table(fn.tst,
+                header=T,colClasses="character",blank.lines.skip=F)
+ 
+     f <- c(f.lrn[,1],f.val[,1],f.tst[,1])
+ 
+     if (mod$feature[i]=="STATE") {
+       f[f=="AS"|f=="DC"|f=="DE"|f=="MA"|f=="ME"|f=="NH"] <- "S1"
+       f[f=="OH"|f=="RI"|f=="VI"|f=="WV"]                 <- "S1"
+       f[f=="AA"|f=="AE"|f=="AP"|f=="CT"|f=="GU"|f=="MD"] <- "S2"
+       f[f=="NJ"|f=="NY"|f=="PA"|f=="PA"|f=="VA"|f=="VT"] <- "S2"
+       f[f=="WY"]                                         <- "S2"  
+       f[f=="AK"|f=="UT"|f=="MS"]                         <- "S3"
+       f[f=="NE"|f=="ND"]                                 <- "S4"
+       f[f=="SD"|f=="SC"]                                 <- "S5"
+     } 
+     
+     f <- as.factor(f)
+ 
+     n.lev <- nlevels(f)
+     print(paste("  nlevels = ",n.lev))
+ 
+     f <- model.matrix(y ~ f - 1)   # Note: Intercept removed.
+     f <- f[,2:ncol(f)]             # Note: First dummy deleted.
+ 
+   } else {
+     
+     f.lrn<-read.table(fn.lrn,
+              header=T,colClasses="numeric",blank.lines.skip=F)
+     f.val<-read.table(fn.val,
+              header=T,colClasses="numeric",blank.lines.skip=F)
+     f.tst<-read.table(fn.tst,
+              header=T,colClasses="numeric",blank.lines.skip=F)
+ 
+     f <- c(f.lrn[,1],f.val[,1],f.tst[,1])
+ 
+     f[is.na(f)] <- 0
+ 
+     if (mod$feature[i]=="DOB") { 
+       d <- f ; d[d>0] <- 1         # Note: Dummy for missing DOB
+       f <- cbind(d,f,f^2)          # Note: Quadratic term added to DOB.
+       rm(d)
+     }
+   }
+ 
+   if (first.time) {
+     X <- f
+     first.time <- FALSE
+   } else {
+     X <- cbind(prev.X,f)
+   }
+ 
+   prev.X <- X
+ 
+   rm(f.lrn,f.val,f)
+ 
+   fit <- lm(y~X,weights=wts)       # Note: Validation sample not in fit.
+ 
+   ehat     <- fit$residuals
+   mse.lrn <-  sum(ehat[idx.lrn]^2)/n.lrn 
+   mse.val <-  sum(ehat[idx.val]^2)/n.val
+   mse.tst <-  sum(ehat[idx.tst]^2)/n.tst
+ 
+   aov <- anova.lm(fit)
+ 
+   print(paste("  rank    = ",fit$rank))
+   print(paste("  P-value = ",aov[1,5]))
+   print(paste("  mse.lrn = ",mse.lrn))
+   print(paste("  mse.val = ",mse.val))
+   print(paste("  mse.tst = ",mse.tst))
+ }
[1] "LASTGIFT"
[1] "  rank    =  2"
[1] "  P-value =  2.67701080673601e-54"
[1] "  mse.lrn =  20.0270115908238"
[1] "  mse.val =  18.7227564450251"
[1] "  mse.tst =  17.8272326970325"
[1] "PEPSTRFL"
[1] "  nlevels =  2"
[1] "  rank    =  3"
[1] "  P-value =  9.07138855945454e-62"
[1] "  mse.lrn =  20.0149368330439"
[1] "  mse.val =  18.7081479932204"
[1] "  mse.tst =  17.8196781443162"
[1] "STATE"
[1] "  nlevels =  33"
[1] "  rank    =  35"
[1] "  P-value =  6.52910427059322e-57"
[1] "  mse.lrn =  19.9900904813962"
[1] "  mse.val =  18.6964249974491"
[1] "  mse.tst =  17.8000428318328"
[1] "RECP3"
[1] "  nlevels =  2"
[1] "  rank    =  36"
[1] "  P-value =  3.33006597654248e-62"
[1] "  mse.lrn =  19.9813328468216"
[1] "  mse.val =  18.6905035298918"
[1] "  mse.tst =  17.7935606373800"
[1] "DOB"
[1] "  rank    =  39"
[1] "  P-value =  3.25009513851553e-64"
[1] "  mse.lrn =  19.9759223652743"
[1] "  mse.val =  18.6854541249365"
[1] "  mse.tst =  17.7877206763215"
[1] "MAILCODE"
[1] "  nlevels =  2"
[1] "  rank    =  40"
[1] "  P-value =  1.97860943842719e-65"
[1] "  mse.lrn =  19.9733005887601"
[1] "  mse.val =  18.6817963870699"
[1] "  mse.tst =  17.8008005003373"
[1] "MHUC2"
[1] "  rank    =  41"
[1] "  P-value =  2.36565741555032e-66"
[1] "  mse.lrn =  19.9711214855491"
[1] "  mse.val =  18.6783890477811"
[1] "  mse.tst =  17.8113665226786"
[1] "LASTDATE"
[1] "  rank    =  42"
[1] "  P-value =  5.51702610528602e-72"
[1] "  mse.lrn =  19.9618521296342"
[1] "  mse.val =  18.6772270733321"
[1] "  mse.tst =  17.8050817002539"
[1] "MINRAMNT"
[1] "  rank    =  43"
[1] "  P-value =  3.92098711525271e-72"
[1] "  mse.lrn =  19.9608334474451"
[1] "  mse.val =  18.6770872778876"
[1] "  mse.tst =  17.8000310256753"
> 
> print(summary.lm(fit))

Call:
lm(formula = y ~ X, weights = wts)

Residuals:
     Min       1Q   Median       3Q      Max 
-21.9797  -0.8335  -0.5786   0.0000 199.1823 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.911e+01  3.402e+00  -5.616 1.96e-08 ***
Xprev.X      2.126e-02  1.415e-03  15.026  < 2e-16 ***
Xf           2.010e-01  3.749e-02   5.361 8.30e-08 ***
XfAR         6.236e-02  2.127e-01   0.293 0.769425    
XfAZ         1.399e-01  1.700e-01   0.823 0.410770    
XfCA         4.137e-01  1.366e-01   3.028 0.002467 ** 
XfCO         2.686e-01  1.772e-01   1.516 0.129520    
XfFL         1.584e-01  1.430e-01   1.107 0.268117    
XfGA         1.496e-01  1.588e-01   0.942 0.346416    
XfHI         6.331e-01  2.819e-01   2.246 0.024709 *  
XfIA        -2.583e-02  1.969e-01  -0.131 0.895609    
XfID         5.674e-01  2.632e-01   2.156 0.031089 *  
XfIL         1.049e-01  1.471e-01   0.713 0.476022    
XfIN         1.351e-02  1.629e-01   0.083 0.933913    
XfKS         7.852e-02  1.980e-01   0.397 0.691704    
XfKY         7.040e-02  1.864e-01   0.378 0.705622    
XfLA         4.164e-02  1.865e-01   0.223 0.823344    
XfMI         1.088e-01  1.492e-01   0.729 0.465977    
XfMN        -7.062e-02  1.730e-01  -0.408 0.683147    
XfMO         1.375e-01  1.655e-01   0.831 0.405837    
XfMT         1.700e-01  2.681e-01   0.634 0.526118    
XfNC         1.928e-01  1.544e-01   1.249 0.211818    
XfNM         2.802e-01  2.231e-01   1.256 0.209036    
XfNV         7.349e-02  2.180e-01   0.337 0.735998    
XfOK        -7.238e-02  1.855e-01  -0.390 0.696369    
XfOR         4.053e-01  1.724e-01   2.350 0.018754 *  
XfS1        -5.482e-01  4.914e-01  -1.116 0.264629    
XfS2         3.605e-01  2.438e-01   1.479 0.139193    
XfS3        -1.031e-01  1.810e-01  -0.569 0.569026    
XfS4         1.207e-01  2.128e-01   0.568 0.570346    
XfS5         2.643e-01  1.749e-01   1.512 0.130650    
XfTN         1.720e-02  1.683e-01   0.102 0.918618    
XfTX         1.236e-01  1.446e-01   0.855 0.392714    
XfWA         2.685e-01  1.583e-01   1.696 0.089902 .  
XfWI        -6.061e-02  1.654e-01  -0.366 0.714104    
Xf           5.765e-01  1.226e-01   4.702 2.58e-06 ***
Xd          -2.427e-01  9.665e-02  -2.511 0.012035 *  
Xf           1.969e-04  5.209e-05   3.780 0.000157 ***
X           -2.615e-08  6.795e-09  -3.849 0.000119 ***
Xf          -4.274e-01  1.452e-01  -2.942 0.003259 ** 
Xf           5.505e-02  2.060e-02   2.673 0.007525 ** 
Xf           2.002e-03  3.560e-04   5.624 1.88e-08 ***
Xf          -4.458e-03  2.414e-03  -1.847 0.064729 .  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 

Residual standard error: 4.469 on 66857 degrees of freedom
Multiple R-Squared: 0.006885,	Adjusted R-squared: 0.006261 
F-statistic: 11.04 on 42 and 66857 DF,  p-value: < 2.2e-16 

> 
> outvec <- fit$fitted.values[idx.lrn]
> outlab <- "yhat.lrn"
> outfil <- "yhat_lrn.dat"
> write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)
> 
> outvec <- fit$fitted.values[idx.val]
> outlab <- "yhat.val"
> outfil <- "yhat_val.dat"
> write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)
> 
> outvec <- fit$fitted.values[idx.tst]
> outlab <- "yhat.tst"
> outfil <- "yhat_tst.dat"
> write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)
> 
> rm(outvec,outlab,outfil)
> 
> source("psopts.r");
> 
> idx <- seq(1,n.lrn,length=500)
> 
> y0.lrn <- c(0,sum(y.lrn-0.68))
> x0.lrn <- c(0,n.lrn)
> 
> idx.yhat <- sort.list(fit$fitted.values[idx.lrn],decreasing=TRUE)
> y1.lrn <- y.lrn[idx.yhat]-0.68 
> y1.lrn <- cumsum(y1.lrn)
> y1.lrn <- y1.lrn[idx]
> x1.lrn <- 1:n.lrn
> x1.lrn <- x1.lrn[idx]
> 
> postscript(file="cty_lif_lrn.eps")
> 
> plot(x=c(x0.lrn,x1.lrn),y=c(y0.lrn,y1.lrn),
+   ylab="dollars",xlab="size of mailing",type="n")
> lines(x=x0.lrn,y=y0.lrn,col="green")
> lines(x=x1.lrn,y=y1.lrn,col="red")
> 
> dev.off()
null device 
          1 
> 
> idx <- seq(1,n.val,length=500)
> 
> y0.val <- c(0,sum(y.val-0.68))
> x0.val <- c(0,n.val)
> 
> idx.yhat <- sort.list(fit$fitted.values[idx.val],decreasing=TRUE)
> y1.val <- y.val[idx.yhat]-0.68 
> y1.val <- cumsum(y1.val)
> y1.val <- y1.val[idx]
> x1.val <- 1:n.val
> x1.val <- x1.val[idx]
> 
> postscript(file="cty_lif_val.eps")
> 
> plot(x=c(x0.val,x1.val),y=c(y0.val,y1.val),
+   ylab="dollars",xlab="size of mailing",type="n")
> lines(x=x0.val,y=y0.val,col="green")
> lines(x=x1.val,y=y1.val,col="blue")
> 
> dev.off()
null device 
          1 
> 
> y0.lrn <- (y0.lrn/y0.lrn[2])*100
> x0.lrn <- (x0.lrn/x0.lrn[2])*100
> 
> y1.lrn <- (y1.lrn/y1.lrn[500])*100
> x1.lrn <- (x1.lrn/x1.lrn[500])*100
> 
> y1.val <- (y1.val/y1.val[500])*100
> x1.val <- (x1.val/x1.val[500])*100
> 
> postscript(file="cty_lif_both.eps")
> 
> plot(x=c(x0.lrn,x1.lrn,x1.val),y=c(y0.lrn,y1.lrn,y1.val),
+   ylab="percent",xlab="percent",type="n")
> lines(x=x0.lrn,y=y0.lrn,col="green")
> lines(x=x1.lrn,y=y1.lrn,col="red")
> lines(x=x1.val,y=y1.val,col="blue")
> 
> dev.off()
null device 
          1 
> 
> outvec <-  x1.val
> outlab <- "x.net.val"
> outfil <- "lif_x_net_val.dat"
> write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)
> 
> outvec <-  y1.val
> outlab <- "y.net.val"
> outfil <- "lif_y_net_val.dat"
> write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)
> 
> idx <- seq(1,n.lrn,length=500)
> 
> y0.lrn <- c(0,sum(y.lrn))
> x0.lrn <- c(0,n.lrn)
> 
> idx.yhat <- sort.list(fit$fitted.values[idx.lrn],decreasing=TRUE)
> y1.lrn <- y.lrn[idx.yhat] 
> y1.lrn <- cumsum(y1.lrn)
> y1.lrn <- y1.lrn[idx]
> x1.lrn <- 1:n.lrn
> x1.lrn <- x1.lrn[idx]
> 
> idx <- seq(1,n.val,length=500)
> 
> y0.val <- c(0,sum(y.val))
> x0.val <- c(0,n.val)
> 
> idx.yhat <- sort.list(fit$fitted.values[idx.val],decreasing=TRUE)
> y1.val <- y.val[idx.yhat] 
> y1.val <- cumsum(y1.val)
> y1.val <- y1.val[idx]
> x1.val <- 1:n.val
> x1.val <- x1.val[idx]
> 
> y0.lrn <- (y0.lrn/y0.lrn[2])*100
> x0.lrn <- (x0.lrn/x0.lrn[2])*100
> 
> y1.lrn <- (y1.lrn/y1.lrn[500])*100
> x1.lrn <- (x1.lrn/x1.lrn[500])*100
> 
> y1.val <- (y1.val/y1.val[500])*100
> x1.val <- (x1.val/x1.val[500])*100
> 
> postscript(file="cty_lif_conv.eps")
> 
> plot(x=c(x0.lrn,x1.lrn,x1.val),y=c(y0.lrn,y1.lrn,y1.val),
+   ylab="percent",xlab="percent",type="n")
> lines(x=x0.lrn,y=y0.lrn,col="green")
> lines(x=x1.lrn,y=y1.lrn,col="red")
> lines(x=x1.val,y=y1.val,col="blue")
> 
> dev.off()
null device 
          1 
> 
> outvec <-  x1.val
> outlab <- "x.gross.val"
> outfil <- "lif_x_gross_val.dat"
> write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)
> 
> outvec <-  y1.val
> outlab <- "y.gross.val"
> outfil <- "lif_y_gross_val.dat"
> write(c(outlab,formatC(outvec,digits=17,width=26,format="e")),file=outfil)
> 
> 
> 
> 
> 
> proc.time()
[1] 52.30 16.88 69.23  0.00  0.00
> 
