#!/usr/bin/R

# Load legacy Gaussian Process code:
source("GPPois_legacy.R")

# Load scattering data, both high-noise and low-noise versions:
tio2.data <- read.table("TiO2_data.txt", sep="\t", header=TRUE)

# Around each peak-region, we'll want to include extra points in the flat
# region, giving the mask function enough space to get close to zero.  This
# value isn't expected to matter much, as long as it's big enough to let the
# mask get to zero, yet small enough not to cause undue computational burden.
# 80 points seems good.
maxFlatPoints <- 80
N <- nrow(tio2.data)

# Load manual (eyeballed) estimates of the boundaries for the 
# peak-containing regions:
region.estimates <- read.table("manual_boundary_estimates.txt",
  sep="\t", header=TRUE)
# Compute the indices into the Q-range for each peak-containing region
regions <- list()
for (i in 1:nrow(region.estimates)) {
  index.range <- range(indicesInRange(v=tio2.data$Q, 
      Min=region.estimates[i, "XL.min"], Max=region.estimates[i, "XR.max"]))
  bound.L <- max(1, index.range[1] - maxFlatPoints)
  bound.R <- min(N, index.range[2] + maxFlatPoints)
  regions[[i]] <- bound.L:bound.R
}

# Optimize hyperparameters
mSOF <- manualSpikyOnFlat(X=tio2.data$Q, Yp=tio2.data$hi.noise,
  ellDipHints=region.estimates)
gc()  # Force garbage collection

# Construct the covariance matrix
hypers.bgr.SE <- hypersForSE(ell=mSOF$bgr.ell, sigmaFSq=mSOF$bgr.sigma.f.sq)
K <- GramMatrixSE(X=tio2.data$Q, hypers=hypers.bgr.SE)
# Add contribution for each sub-region
for (i in 1:nrow(mSOF$peak.regions)) {
  idx <- regions[[i]]
  Q.i <- tio2.data$Q[idx]
  p.i <- mSOF$peak.regions[i, ]
  local.SE.hypers <- hypersForSE(ell=p.i["ell"], sigmaFSq=p.i["sigmaFSq"])
  K.local <- GramMatrixSE(X=Q.i, hypers=local.SE.hypers)
  X.xL.scaled <- (Q.i - p.i["xL"]) / p.i["ell"]
  X.xR.scaled <- (Q.i - p.i["xR"]) / p.i["ell"]
  mask.vals <- 0.5*(s(X.xL.scaled) - s(X.xR.scaled))
  K.mask <- outer(mask.vals, mask.vals)
  K.local <- K.local * K.mask
  K[idx, idx] <- K[idx, idx] + K.local
}
gc()

# Compute mean and 1-sigma uncertainty.  Some key variables:
#   K:  The total covariance matrix for the signal contribution
#   M:  The matrix relating the noisy datapoints to the estimate of the true
#      signal.
#   Var:  The diagonal of the covariance matrix which describes uncertainty in
#      the true signal.
M <- K %*% solve(K + 0.25 * diag(N))
f.mean <- M %*% sqrt(tio2.data$hi.noise + (3.0 / 8.0))  # Anscombe transform
# The full covariance matrix would be:
#Cov <- (diag(N) - M) %*% K
# However, the following shortcut computes only the diagonal elements, since
# that's all we need for POINTWISE uncertainty.  (Of course, doing a full
# analysis would give more information.)
# NB: '*' is element-wise multiplication, NOT matrix multiplication '%*%'!
Var <- rowSums((diag(N) - M) * t(K))
denoised <- data.frame(Q=tio2.data$Q,
  best         =GaussToPoisson(f.mean            ),
  plus.1.sigma =GaussToPoisson(f.mean + sqrt(Var)),
  minus.1.sigma=GaussToPoisson(f.mean - sqrt(Var)))
write.table(denoised, file="denoising_results.txt", sep="\t", quote=FALSE,
  row.names=FALSE)
