R : Copyright 2003, The R Development Core Team Version 1.6.2 (2003-01-10) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type `license()' or `licence()' for distribution details. R is a collaborative project with many contributors. Type `contributors()' for more information. Type `demo()' for some demos, `help()' for on-line help, or `help.start()' for a HTML browser interface to help. Type `q()' to quit R. > invisible(options(echo = TRUE)) > library(rpart) > > target.lrn <- read.table("../lrn/num/472.dat",header=T,colClasses="numeric") > target.val <- read.table("../val/num/472.dat",header=T,colClasses="numeric") > target.tst <- read.table("../tst/num/472.dat",header=T,colClasses="numeric") > > > y.lrn <- target.lrn[,1] > y.val <- target.val[,1] > y.tst <- target.tst[,1] > y <- c(y.lrn,y.val,y.tst) > > n.lrn <- length(y.lrn) > n.val <- length(y.val) > n.tst <- length(y.tst) > n <- length(y) > > rm(target.lrn,target.val,target.tst) > > wts <- mat.or.vec(n,1) ; for (i in 1:n.lrn) wts[i]=1 > idx.lrn <- 1:n.lrn > idx.val <- (n.lrn+1):(n.lrn+n.val) > idx.tst <- (n.lrn+n.val+1):n > > mod <- read.table("../cty_mod.txt", + header=F,colClasses="character",col.names=c("file","feature","type")) > > n.mod <- length(mod$file) > > first.time <- TRUE > > for (i in 1:n.mod) { + + fn.lrn <- paste("../lrn/",mod$type[i],"/",mod$file[i],".dat",sep="") + fn.val <- paste("../val/",mod$type[i],"/",mod$file[i],".dat",sep="") + fn.tst <- paste("../tst/",mod$type[i],"/",mod$file[i],".dat",sep="") + print(mod$feature[i]) + + if (mod$type[i]=="chr") { + + f.lrn <- read.table(fn.lrn, + header=T,colClasses="character",blank.lines.skip=F) + f.val <- read.table(fn.val, + header=T,colClasses="character",blank.lines.skip=F) + f.tst <- read.table(fn.tst, + header=T,colClasses="character",blank.lines.skip=F) + + f <- c(f.lrn[,1],f.val[,1],f.tst[,1]) + + if (mod$feature[i]=="STATE") { + f[f=="AS"|f=="DC"|f=="DE"|f=="MA"|f=="ME"|f=="NH"] <- "S1" + f[f=="OH"|f=="RI"|f=="VI"|f=="WV"] <- "S1" + f[f=="AA"|f=="AE"|f=="AP"|f=="CT"|f=="GU"|f=="MD"] <- "S2" + f[f=="NJ"|f=="NY"|f=="PA"|f=="PA"|f=="VA"|f=="VT"] <- "S2" + f[f=="WY"] <- "S2" + f[f=="AK"|f=="UT"|f=="MS"] <- "S3" + f[f=="NE"|f=="ND"] <- "S4" + f[f=="SD"|f=="SC"] <- "S5" + } + + f <- as.factor(f) + + n.lev <- nlevels(f) + f.name <- levels(f) + if(n.lev==2) f.name <- c(mod$feature[i],mod$feature[i]) + print(paste(" nlevels = ",n.lev)) + + f <- model.matrix(y ~ f - 1) # Note: Intercept removed. + f <- f[,2:n.lev] # Note: First dummy deleted. + f.name <- f.name[2:n.lev] # Note: First name deleted. + + } else { + + f.lrn<-read.table(fn.lrn, + header=T,colClasses="numeric",blank.lines.skip=F) + f.val<-read.table(fn.val, + header=T,colClasses="numeric",blank.lines.skip=F) + f.tst<-read.table(fn.tst, + header=T,colClasses="numeric",blank.lines.skip=F) + + f <- c(f.lrn[,1],f.val[,1],f.tst[,1]) + + f[is.na(f)] <- 0 + + f.name <- mod$feature[i] + + } + + if (first.time) { + X <- f + X.names <- f.name + first.time <- FALSE + } else { + X <- cbind(prev.X,f) + X.names <- c(prev.X.names,f.name) + } + + prev.X <- X + prev.X.names <- X.names + + } [1] "LASTGIFT" [1] "PEPSTRFL" [1] " nlevels = 2" [1] "STATE" [1] " nlevels = 33" [1] "RECP3" [1] " nlevels = 2" [1] "DOB" [1] "MAILCODE" [1] " nlevels = 2" [1] "MHUC2" [1] "LASTDATE" [1] "MINRAMNT" > > rm(prev.X,prev.X.names) > rm(f.lrn,f.val,f) > > dimnames(X) <- list(NULL,X.names) > > print(X.names) [1] "LASTGIFT" "PEPSTRFL" "AR" "AZ" "CA" "CO" [7] "FL" "GA" "HI" "IA" "ID" "IL" [13] "IN" "KS" "KY" "LA" "MI" "MN" [19] "MO" "MT" "NC" "NM" "NV" "OK" [25] "OR" "S1" "S2" "S3" "S4" "S5" [31] "TN" "TX" "WA" "WI" "RECP3" "DOB" [37] "MAILCODE" "MHUC2" "LASTDATE" "MINRAMNT" > > X.lrn <- X[idx.lrn,] > X.val <- X[idx.val,] > X.tst <- X[idx.tst,] > > > print(length(y.lrn[(X.lrn[,1]>=69) & (X.lrn[,2]>=0.5)])) [1] 173 > print( mean(y.lrn[(X.lrn[,1]>=69) & (X.lrn[,2]>=0.5)])) [1] 7.855491 > > print(length(y.val[(X.val[,1]>=69) & (X.val[,2]>=0.5)])) [1] 52 > print( mean(y.val[(X.val[,1]>=69) & (X.val[,2]>=0.5)])) [1] 4.326923 > > print(length(y.tst[(X.tst[,1]>=69) & (X.tst[,2]>=0.5)])) [1] 24 > print( mean(y.tst[(X.tst[,1]>=69) & (X.tst[,2]>=0.5)])) [1] 4.166667 > > print(length(y.lrn[(X.lrn[,1]>=69) & (X.lrn[,2]>=0.5) & (X.lrn[,39]>=9612)])) [1] 25 > print( mean(y.lrn[(X.lrn[,1]>=69) & (X.lrn[,2]>=0.5) & (X.lrn[,39]>=9612)])) [1] 19.08 > > print(length(y.val[(X.val[,1]>=69) & (X.val[,2]>=0.5) & (X.val[,39]>=9612)])) [1] 7 > print( mean(y.val[(X.val[,1]>=69) & (X.val[,2]>=0.5) & (X.val[,39]>=9612)])) [1] 0 > > print(length(y.tst[(X.tst[,1]>=69) & (X.tst[,2]>=0.5) & (X.tst[,39]>=9612)])) [1] 7 > print( mean(y.tst[(X.tst[,1]>=69) & (X.tst[,2]>=0.5) & (X.tst[,39]>=9612)])) [1] 0 > > print(length(y.lrn[(X.lrn[,1]< 20) & (X.lrn[,2]< 0.5)])) [1] 17886 > print( mean(y.lrn[(X.lrn[,1]< 20) & (X.lrn[,2]< 0.5)])) [1] 0.6388237 > > print(length(y.lrn[(X.lrn[,1]>=20) & (X.lrn[,2]< 0.5)])) [1] 17276 > print( mean(y.lrn[(X.lrn[,1]>=20) & (X.lrn[,2]< 0.5)])) [1] 0.8259962 > > print(length(y.lrn[(X.lrn[,1]< 20) & (X.lrn[,2]>=0.5)])) [1] 23971 > print( mean(y.lrn[(X.lrn[,1]< 20) & (X.lrn[,2]>=0.5)])) [1] 0.735792 > > print(length(y.lrn[(X.lrn[,1]>=20) & (X.lrn[,2]>=0.5)])) [1] 7767 > print( mean(y.lrn[(X.lrn[,1]>=20) & (X.lrn[,2]>=0.5)])) [1] 1.215012 > > proc.time() [1] 11.17 1.15 12.37 0.00 0.00 >