#include "libscl.h" 
using namespace scl; 
using namespace std;

int main(int argc, char** argp, char** envp)
{
  vector<string> string_names;
  vector< vector<string> > string_data;
  vector<string> numeric_names;
  realmat numeric_data;
  vector<string> unknown_names;

  csvread("epa.csv", true, string_names, string_data,
    numeric_names, numeric_data, unknown_names);

  cout << starbox("/Names of the columns of numeric_data//");
  for (vector<string>::size_type i = 0; i != numeric_names.size(); ++i) {
    cout << "\t Col" << fmt('d',4,i+1) << "    " << numeric_names[i] << '\n';
  }

  cout << starbox("/First five rows of numeric_data//");
  cout << numeric_data("1:5","");

  cout << starbox("/Last five rows of numeric_data//");
  cout << numeric_data(seq(numeric_data.nrow()-4,numeric_data.nrow()),"");

  realmat WE = numeric_data("","4,9");    // WeightInTons, EngineInLiters

  intvec X2_cols("5:8,10:14");            // all the other regressors
  realmat X2 = numeric_data("",X2_cols);  // except the intercept term

  realmat y = numeric_data("",2);         // GallonsPerMile

  poly quadratic('r',WE(1,""),2,2);

  realmat X1(X2.nrow(),quadratic.get_len());

  realmat basis;
  for (INTEGER i=1; i<=WE.nrow(); ++i) {
    quadratic.set_x(WE(i,""));
    quadratic.get_basis(basis);
    for (INTEGER j=1; j<=X1.ncol(); ++j) X1(i,j) = basis[j];
  }

  INTEGER rank;
  realmat X0 = cbind(X1,X2);          // The intercept term is the
  realmat C = invpsd(T(X0)*X0,rank);  // monomial with multi-index (0,0)
  realmat b_hat = C*(T(X0)*y);
  realmat residual = y - X0*b_hat;
  realmat SSE = T(residual)*residual;
  REAL s_squared = SSE[1]/(X0.nrow() - rank);
  realmat V = s_squared*C;

  cout << starbox("/First five rows of X0//");
  cout << X0("1:5","");

  cout << starbox("/Last five rows of X0//");
  cout << X0(seq(X0.nrow()-4,X0.nrow()),"");

  INTEGER namelen = numeric_names[0].size();
  string pad(5,' ');

  cout << starbox("/Regression of y on X0//");
  cout << pad;
  cout << "Regressor";
  for (INTEGER i=9; i<namelen; ++i) cout << ' '; 
  cout << "           Coeffient     Standard Errror   t-Statistic" << '\n';
  vector<string> multi;
  quadratic.get_multi(multi,',');
  for (vector<string>::size_type i = 0; i < multi.size(); ++i) {
    cout << pad;
    string monomial = string("(W,E)^") + "(" + multi[i] + ")";
    cout << monomial;
    for (INTEGER j=monomial.size(); j<namelen; ++j) cout << ' ';
    INTEGER k = i + 1;
    cout << fmt('f',20,8,b_hat[k]) << fmt('f',20,8,sqrt(V(k,k))) 
         << fmt('f',14,3,b_hat[k]/sqrt(V(k,k)));
    cout << '\n';
  } 
  for (INTEGER i = 1; i <= X2_cols.size(); ++i) {
    cout << pad;
    cout << numeric_names[X2_cols[i] - 1];    // numeric_names indexes from 0
    INTEGER k = i + X1.ncol();
    cout << fmt('f',20,8,b_hat[k]) << fmt('f',20,8,sqrt(V(k,k)))
         << fmt('f',14,3,b_hat[k]/sqrt(V(k,k)));
    cout << '\n';
  }
  
  return 0;
}
