##  MaxCorr.Quantization.R ("Maximize correlation thru merging the neighboring bins after quantization")
## Shouyong Peng
##
## This R script contains a collection of functions to do quantization and merging the neighboring bins

#dyn.load("src/MaxCorr.Quantization.so")

########################################################################
merge_quant <- function(q.n0,nbin= NULL) {
  if (is.null(nbin)) {
    nbin <-
      max(q.n0)
  }
  
  if (nbin < 3) {
    print("not enough bin number to merge")
    print(nbin)
    return(list(q = q.n0, nbin =  nbin))
  }
  
  if (ncol(q.n0) !=2) {
    print("should be 2 columns")
    return()
  }

  q1_o = as.integer(q.n0[,1])
  q2_o = as.integer(q.n0[,2])
  nbin_o = as.integer(nbin)
  n = nrow(q.n0)

  q1_n = integer(n)
  q2_n = integer(n)
  nbin_n = as.integer(nbin)

  system.time(
  junk <- .C("merge_quant",
             q1_o = as.integer(q.n0[,1]),
             q2_o = as.integer(q.n0[,2]),
             nbin_o = nbin_o,
             n = nrow(q.n0),
             q1_n = q1_n,
             q2_n = q2_n,
             nbin_n = nbin_n)
              )
  str(junk)
  q_n <- cbind(junk$q1_n, junk$q2_n); colnames(q_n) = colnames(q.n0);
  nbin <- junk$nbin_n

  res <- list(q = q_n, nbin =  nbin)
  res
}
########################################################################

get_max_corr <- function(data=NULL, nbin=NULL)  {
  if (!is.null(nbin)) {
    bl <- list()
    for (i in 1:2)
      bl[[i]] <- quantile(data[,i], 0:nbin/nbin)
  }
  
  nbin <-
    as.integer( length(bl[[1]]) - 1)
  q.n0 <- data
  for (i in 1:2)       q.n0[,i] <- quantize(x=data[,i], breaks=bl[[i]])
  
  c0 <- abs(cor(q.n0) [1,2])   # initial corre

  q1_o = as.integer(q.n0[,1])
  q2_o = as.integer(q.n0[,2])
  nbin_o = nbin
  n = nrow(q.n0)

  q1_n = q1_o # integer(n)
  q2_n = integer(n)
  nbin_n = nbin
  
  junk = .C("get_max_corr",
    q1_o = as.integer(q.n0[,1]),
    q2_o = as.integer(q.n0[,2]),
    nbin_o = nbin_o,
    n = nrow(q.n0),
    q1_n = q1_n,
    q2_n = q2_n,
    nbin_n = nbin_n)

  q.n0[,1]  <- junk$q1_n; q.n0[,2] <-  junk$q2_n; 
  nbin <- junk$nbin_n
    
  qcut <- round(c(0, cumsum(table(q.n0[,1])/nrow(q.n0))),3 )
  
  for (i in 1:2)
    bl[[i]] <- quantile(data[,i], qcut)
  
  max.corr <- abs(cor(q.n0)[1,2])
  
  res <- list(bl=bl,max.corr=max.corr,q=q.n0)
  res
}
