R : Copyright 2002, The R Development Core Team Version 1.5.1 (2002-06-17) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type `license()' or `licence()' for distribution details. R is a collaborative project with many contributors. Type `contributors()' for more information. Type `demo()' for some demos, `help()' for on-line help, or `help.start()' for a HTML browser interface to help. Type `q()' to quit R. > invisible(options(echo = TRUE)) > target.lrn <- read.table("../lrn/num/472.dat",header=T,colClasses="numeric") > target.val <- read.table("../val/num/472.dat",header=T,colClasses="numeric") > > y.lrn <- target.lrn[,1] > y.val <- target.val[,1] > y <- c(y.lrn,y.val) > > n.lrn <- length(y.lrn) > n.val <- length(y.val) > n <- length(y) > > rm(target.lrn,target.val) > > wts <- mat.or.vec(n,1) ; for (i in 1:n.lrn) wts[i]=1 > > mod <- read.table("cty_mod.txt", + header=F,colClasses="character",col.names=c("file","feature","type")) > > n.mod <- length(mod$file) > > first.time <- TRUE > > for (i in 1:n.mod) { + + fn.lrn <- paste("../lrn/",mod$type[i],"/",mod$file[i],".dat",sep="") + fn.val <- paste("../val/",mod$type[i],"/",mod$file[i],".dat",sep="") + print(mod$feature[i]) + + if (mod$type[i]=="chr") { + + f.lrn <- read.table(fn.lrn, + header=T,colClasses="character",blank.lines.skip=F) + f.val <- read.table(fn.val, + header=T,colClasses="character",blank.lines.skip=F) + + f <- c(f.lrn[,1],f.val[,1]) + + if (mod$feature[i]=="STATE") { + f[f=="AS"|f=="DC"|f=="DE"|f=="MA"|f=="ME"|f=="NH"] <- "S1" + f[f=="OH"|f=="RI"|f=="VI"|f=="WV"] <- "S1" + f[f=="AA"|f=="AE"|f=="AP"|f=="CT"|f=="GU"|f=="MD"] <- "S2" + f[f=="NJ"|f=="NY"|f=="PA"|f=="PA"|f=="VA"|f=="VT"] <- "S2" + f[f=="WY"] <- "S2" + f[f=="AK"|f=="UT"|f=="MS"] <- "S3" + f[f=="NE"|f=="ND"] <- "S4" + f[f=="SD"|f=="SC"] <- "S5" + } + + f <- as.factor(f) + + n.lev <- nlevels(f) + print(paste(" nlevels = ",n.lev)) + + f <- model.matrix(y ~ f - 1) # Note: Intercept removed. + f <- f[,2:ncol(f)] # Note: First dummy deleted. + + } else { + + f.lrn<-read.table(fn.lrn, + header=T,colClasses="numeric",blank.lines.skip=F) + f.val<-read.table(fn.val, + header=T,colClasses="numeric",blank.lines.skip=F) + + f <- c(f.lrn[,1],f.val[,1]) + + f[is.na(f)] <- 0 + + if (mod$feature[i]=="DOB") { + d <- f ; d[d>0] <- 1 # Note: Dummy for missing DOB + f <- cbind(d,f,f^2) # Note: Quadratic term added to DOB. + rm(d) + } + } + + if (first.time) { + X <- f + first.time <- FALSE + } else { + X <- cbind(prev.X,f) + } + + prev.X <- X + + rm(f.lrn,f.val,f) + + fit <- lm(y~X,weight=wts) # Note: Validation sample not in fit. + + ehat.lrn <- fit$residuals[1:n.lrn] + ehat.val <- fit$residuals[(n.lrn+1):n] + ehat <- fit$residuals + + sse.lrn <- sum(ehat.lrn^2) + sse.val <- sum(ehat.val^2) + sse <- sum(ehat^2) + + mse.lrn <- sse.lrn/(n.lrn - fit$rank) + mse.val <- sse.val/n.val + mse <- mse.lrn*(fit$rank/n.lrn) + mse.val + + rm(ehat.lrn,ehat.val,ehat) + aov <- anova.lm(fit) + + print(paste(" rank = ",fit$rank)) + print(paste(" P-value = ",aov[1,5])) + print(paste(" mse.lrn = ",mse.lrn)) + print(paste(" mse.val = ",mse.val)) + print(paste(" mse = ",mse)) + } [1] "LASTGIFT" [1] " rank = 2" [1] " P-value = 2.67701080686626e-54" [1] " mse.lrn = 20.0276103235768" [1] " mse.val = 18.7227564450238" [1] " mse = 18.7233551777689" [1] "PEPSTRFL" [1] " nlevels = 2" [1] " rank = 3" [1] " P-value = 9.0713885592176e-62" [1] " mse.lrn = 20.0158344040896" [1] " mse.val = 18.7081479932208" [1] " mse = 18.7090455642703" [1] "HVP3" [1] " rank = 4" [1] " P-value = 4.52203910507778e-78" [1] " mse.lrn = 19.9920551714772" [1] " mse.val = 18.6990223300463" [1] " mse = 18.7002176696679" [1] "RECP3" [1] " nlevels = 2" [1] " rank = 5" [1] " P-value = 4.84731881981689e-83" [1] " mse.lrn = 19.9839879541175" [1] " mse.val = 18.6931781708007" [1] " mse = 18.6946717423967" [1] "DOB" [1] " rank = 8" [1] " P-value = 4.49574549005822e-84" [1] " mse.lrn = 19.9793807866712" [1] " mse.val = 18.6881846419117" [1] " mse = 18.6905738055334" [1] "MAILCODE" [1] " nlevels = 2" [1] " rank = 9" [1] " P-value = 5.93152737490832e-85" [1] " mse.lrn = 19.9771958512967" [1] " mse.val = 18.6846932677738" [1] " mse = 18.6873807829108" [1] "LASTDATE" [1] " rank = 10" [1] " P-value = 1.35828488195307e-90" [1] " mse.lrn = 19.9683912432329" [1] " mse.val = 18.6838755366517" [1] " mse = 18.6868603484968" [1] "HV3" [1] " rank = 11" [1] " P-value = 5.85757710769845e-90" [1] " mse.lrn = 19.9683721093116" [1] " mse.val = 18.6834290791433" [1] " mse = 18.6867123690267" > > print(summary.lm(fit)) Call: lm(formula = y ~ X, weights = wts) Residuals: Min 1Q Median 3Q Max -20.4184 -0.8575 -0.6260 0.0000 199.0753 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.862e+01 3.397e+00 -5.480 4.26e-08 *** Xprev.X 1.969e-02 1.226e-03 16.064 < 2e-16 *** Xf 2.253e-01 3.579e-02 6.295 3.10e-10 *** Xf 3.634e-03 7.088e-04 5.127 2.95e-07 *** Xf 5.702e-01 1.225e-01 4.653 3.28e-06 *** Xd -2.509e-01 9.607e-02 -2.611 0.00903 ** Xf 2.025e-04 5.199e-05 3.894 9.88e-05 *** X -2.734e-08 6.786e-09 -4.029 5.62e-05 *** Xf -4.189e-01 1.452e-01 -2.885 0.00391 ** Xf 1.963e-03 3.557e-04 5.518 3.44e-08 *** Xf 1.162e-02 1.126e-02 1.032 0.30229 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 4.469 on 66889 degrees of freedom Multiple R-Squared: 0.006673, Adjusted R-squared: 0.006525 F-statistic: 44.94 on 10 and 66889 DF, p-value: < 2.2e-16 > > x0 <- c(0,n.lrn) > y0 <- c(0,sum(y.lrn-0.68)) > > x1 <- (1:n.lrn) > y1 <- fit$fitted.values[1:n.lrn] > y1 <- y1-0.68 > y1 <- sort(y1) > y1 <- y1[n.lrn:1] > y1 <- cumsum(y1) > > idx <- 1:n.lrn > > print(paste("maximum profit in learning sample is ", max(y1))) [1] "maximum profit in learning sample is 11554.2620355405" > print(paste("maximum occurs at ", idx[y1==max(y1)])) [1] "maximum occurs at 39813" > > idx <- seq(1,n.lrn,length=200) > > x1 <- x1[idx] > y1 <- y1[idx] > > source("psopts.r"); > postscript(file="cty_mod.eps"); > > plot(x=c(x0,x1),y=c(y0,y1),ylab="dollars",xlab="size of mailing",type="n") > lines(x=x0,y=y0,col="green") > lines(x=x1,y=y1,col="red") > > dev.off() null device 1 > proc.time() [1] 93.00 8.54 101.55 0.00 0.00 >