#!/usr/bin/R

library("aws")
library("png")
#library("debug")
#library("dtt")

loadXRayData <- function() {
  x <- getwd()
  source("../Igor_Xray/Xray.R")
  setwd(x)
  source("GPPois.R")
}

######################################################################
# HELPER FUNCTIONS

# Current timestamp: YYYYMMDDhhmmss
quickTimestamp <- function(fmt="%Y%m%d_%H%M%S") {
  return (format(Sys.time(), fmt))
}

# Average a matrix over a given binsize (ignoring points that "fall off the edge").
bin.matrix <- function(mat, bin.w) {
  avg.vec <- rep(1/bin.w,bin.w)
  n.col <- trunc(ncol(mat)/bin.w)
  n.row <- trunc(nrow(mat)/bin.w)
  # WHAT IS N AND f?
  B.col <-   matrix(c(rep(c(avg.vec,rep(0,n.col*bin.w)),n.col-1),avg.vec), ncol=n.col)
  B.row <- t(matrix(c(rep(c(avg.vec,rep(0,n.row*bin.w)),n.row-1),avg.vec), ncol=n.row))
  return (B.row %*% mat[1:(n.row*bin.w), 1:(n.col*bin.w)] %*% B.col)
}

# The "width" around a point is the set of all points (within the bounds of X)
# closer to that point than any other.
widths <- function(X) {
  N <- length(X)
  return (diff(c(X[1], (X[1:(N-1)] + X[2:N])*0.5, X[N])))
}

# Calculate dimensionless distances: D(i,j) is distance from i to j, in units
# of ell(j).
DimensionlessDistances <- function(X, ell) {
  N <- length(X)
  DD <- matrix(0, nrow=N, ncol=N)
  for (i in 2:N) {
    for (j in 1:(i-1)) {
      Diff <- abs(X[i] - X[j])
      DD[i,j] <- Diff / ell[j]
      DD[j,i] <- Diff / ell[i]
    }
  }
  return (DD)
}

indicesInRange <- function(v, Min, Max) {
  return (which(v %in% subset(v, v >= Min & v <= Max)))
}

# Calculate reduced hypocrisy density: useful for calculating both total
# hypocrisy and its derivatives.  (And calculate total hypocrisy as we go.
reducedHypocrisyDensity <- function(X, ell, DD, DD2, expDD2) {
  N <- length(X)
  prefactor <- function(i,j) return (1/(expDD2[i,j] + DD[i,j] - 1))
  ell2 <- ell^2
  h <- matrix(0, nrow=N, ncol=N)
  H <- 0
  dX <- widths(X)
  for (i in 2:N) {
    for (j in 1:(i-1)) {
      Diff <- ell[i] - ell[j]
      h[i, j] <-  prefactor(i,j) * Diff / ell2[j]
      h[j, i] <- -prefactor(j,i) * Diff / ell2[i]
      H <- H + dX[i] * dX[j] * Diff * (h[i,j] - h[j,i])
    }
  }
  return (list(H=H, h=h))
}

hypocrisyAndGradient <- function(X, ell, tauSq) {
  N <- length(X)
  DD <- DimensionlessDistances(X=X, ell=ell)
  DD2 <- DD^2
  expDD2 <- exp(DD2)

  # Calculate reduced hypocrisy density: useful for calculating both total
  # hypocrisy and its derivatives.  (And calculate total hypocrisy as we go.
  prefactor <- function(i,j) return (1/(expDD2[i,j] + DD[i,j] - 1))
  dX <- widths(X)
  rhd <- reducedHypocrisyDensity(X=X, ell=ell, DD=DD, DD2=DD2, expDD2=expDD2)
  h <- rhd$h
  H <- rhd$H / (2 * tauSq)

  # Finally, calculate the gradient
  grad <- 0 * ell # copy structure
  for (k in 1:N) {
    i <- (1:N)[-k]
    grad[k] <- grad[k] + dX[k] * sum(dX[i] * (
        2*(h[k,i] - h[i,k]*ell[i]/ell[k]) +
        (ell[i]-ell[k])*h[i,k] *
        (2*DD2[i,k] + DD[i,k]/expDD2[i,k])/(ell[k]*(1 + (DD[i,k]-1)/expDD2[i,k]))
        ))
  }
  grad <- grad / (2 * tauSq)

  return (list(hyp=H, grad=grad))
}

# The idea here is to interpolate X such that the hypocrisy integral approaches
# its value for the continuum limit.  'maxWidth' is the maximum width we'll
# allow between datapoints, in terms of the average ell-value in that region.
continuousNormedHypocrisy <- function(X, ell, maxWidth=1.0) {
  # First: find what X-values to interpolate at
  N <- length(X)
  DX <- diff(X)
  NW <- 2*DX/(ell[-1] + ell[-N])
  nInterp <- ceiling(NW/maxWidth)
  Xout <- X[1]
  for (i in 1:(length(X)-1)) {
    Xout <- c(Xout, X[i] + (1:nInterp[i]) * DX[i] / nInterp[i])
  }
  # Now calculate the hypocrisy
  ellInterp <- spline(method="natural", x=X, xout=Xout, y=ell)$y
  return (hypocrisyAndGradient(Xout, ellInterp, tauSq=1)$hyp)
}

continuousHypocrisy <- function(X, ell, tauSq) {
  return (continuousNormedHypocrisy(X=X, ell=ell)/(2*tauSq))
}

numericalHypocrisyGradient <- function(X, ell, tauSq) {
  # Calculate EMPIRICAL gradient
  myenv <- new.env()
  assign("X", X, envir=myenv)
  assign("ell", ell, envir=myenv)
  assign("tauSq", tauSq, envir=myenv)
  return (attr(numericDeriv(quote(hypocrisyAndGradient(X, ell, tauSq)$hyp), "ell", myenv), "gradient"))
}

testHypocrisyAndGradient <- function(X, ell) {
  HG <- hypocrisyAndGradient(X, ell, 1)

  searchDir <- rnorm(n=length(ell))
  alpha <- 1
  for (i in 1:12) {
    change <- (alpha*searchDir)
    actual <- hypocrisyAndGradient(X, ell + change, 1)$hyp - HG$hyp
    expected <- change %*% HG$grad
    cat(sprintf("Ratio: %8.6f (actual=%.2e)/(expected=%.2e)\n", actual/expected, actual, expected))
    alpha <- alpha / 10.0
  }
}

minusLogHypocrisy <- function(X, ell, tauSq) {
  logH <- 0
  N <- length(X)
  dX <- widths(X)
  ell2 <- ell^2
  for (i in 2:N) {
    for (j in 1:(i-1)) {
      DX <- abs(X[i]-X[j])
      CijjSqInvM1 <- exp((DX/ell[j])^2) - 1
      CjiiSqInvM1 <- exp((DX/ell[i])^2) - 1
      logH <- logH + (ell[i] - ell[j])^2 * dX[i] * dX[j] * (1/(CijjSqInvM1 * ell[j] + DX)^2 + 1/(CjiiSqInvM1 * ell[i] + DX)^2)
    }
  }
  return (-logH/(2*tauSq))
}

gradHypocrisyOnePoint <- function(X, ell, i, tauSq) {
  gHOP <- 0
  N <- length(X)
  dX <- widths(X)
  for (j in (1:N)[-i]) {
    DeltaX <- abs(X[i]-X[j])
    CjiiSqInv <- exp((DeltaX/ell[i])^2)
    CjiiSqInvM1 <- CjiiSqInv - 1
    CijjSqInvM1 <- exp((DeltaX/ell[j])^2) - 1
    gHOP <- gHOP + (ell[i] - ell[j]) * (
      1/(DeltaX + ell[i] * CjiiSqInvM1) +
      1/(DeltaX + ell[j] * CijjSqInvM1) +
      (ell[i] - ell[j]) * (1 + CjiiSqInv*((DeltaX/ell[i])^2-1)) / (DeltaX + ell[i] * CjiiSqInvM1)^2
      ) * dX[j]
  }
  return (dX[i] * gHOP / (2 * tauSq))
}

gradHypocrisy <- function(X, ell, tauSq) {
  hyp <- ell
  for (i in 1:length(X))
    hyp[i] <- gradHypocrisyOnePoint(X, ell, i, tauSq)
  return (hyp)
}

gradHypocrisyLogspace <- function(X, ell, tauSq) {
  return (ell * gradHypocrisy(X, ell, tauSq))
}

minusLogHypocrisyNewPoint <- function(ellNew, XNew, ell, X, tauSq=var(c(0,ell))) {
  # Insert new point in the right place
  cutoff <- length(which(X < XNew))
  N <- length(X)
  if (cutoff == 0) {
    totalX <- c(XNew, X)
    totalEll <- c(ellNew, ell)
  } else if (cutoff == N) {
    totalX <- c(X, XNew)
    totalEll <- c(ell, ellNew)
  } else {
    totalX <- c(X[1:cutoff], XNew, X[(cutoff+1):N])
    totalEll <- c(ell[1:cutoff], ellNew, ell[(cutoff+1):N])
  }
  return (minusLogHypocrisy(totalX, totalEll, tauSq))
}

minimumHypocrisyInterpolant <- function(X, ell, XNew) {
  ellRange <- range(ell)
  guessRange <- mean(ellRange) + c(-1,1)*diff(ellRange)
  tol <- 0.001*ellRange/length(XNew)
  ellNew <- c()
  for (Xx in XNew) {
    Xi <- which(X==Xx)
    if (length(Xi) > 0) {
      ellNew <- c(ellNew, ell[Xi])
    } else {
      opt <- optimize(f=minusLogHypocrisyNewPoint, interval=guessRange, maximum=T, tol=tol,
        XNew=Xx, ell=ell, X=X, tauSq=1)
      ellNew <- c(ellNew, opt$maximum)
    }
  }
  return (ellNew)
}

# http://r.789695.n4.nabble.com/Find-the-closest-value-in-a-list-or-matrix-td838131.html 2011-04-22
indexOfClosestValue <- function(myVec, value) {
  return (min(which(abs(myVec-value) == min(abs(myVec-value)))))
}

# Take a decimal representation of a number, and replace the "." with "p" to
# make nicer filenames
pointToP <- function(num, digitsAfterDot="") {
  point <- ifelse(nchar(digitsAfterDot) > 0, '.', '')
  fmtString <- sprintf("%s%s%sf", '%', point, digitsAfterDot)
  return (gsub("\\.", "p", sprintf(fmtString, num)))
}

# Same as above, but (S)cientific (N)otation
pointToPSN <- function(num, digitsAfterDot=2) {
  fmtstring <- sprintf("%s%de", '%.', digitsAfterDot)
  return (gsub("\\.", "p", sprintf(fmtstring, num)))
}

# The famed Anscombe transform
PoissonToGauss <- function(Y) {
  return (sqrt(Y + 3.0/8.0))
}

GaussToPoisson <- function(Y) {
  # Traditional inverse transform:
  return (Y ^ 2 - 1.0 / 8.0)
  ## Makitalo and Foi (2011):
  #Y <- pmax(0.6124, Y) # This function hits zero just below 0.6124 so any lower counts should be regarded as spurious
  #r32 <- sqrt(3/2)
  #Yinv <- Y^(-1)
  #return (Y^2 - 1/8 + r32*Yinv/8 - 11*(Yinv^2)/32 + 5*r32*(Yinv^3)/64)
}

MSRPG <- function(a,b) {
  return (mean((PoissonToGauss(a)-PoissonToGauss(b))^2))
}

MSR <- function(a,b) {
  return (mean((a-b)^2))
}

# Taken from http://www.phaget4.org/R/image_matrix.html 2011-01-06 15:35
myImagePlot <- function(x, ...){
  min <- min(x)
  max <- max(x)
  yLabels <- rownames(x)
  xLabels <- colnames(x)
  title <-c()
  # check for additional function arguments
  if( length(list(...)) ){
    Lst <- list(...)
    if( !is.null(Lst$zlim) ){
      min <- Lst$zlim[1]
      max <- Lst$zlim[2]
    }
    if( !is.null(Lst$yLabels) ){
      yLabels <- c(Lst$yLabels)
    }
    if( !is.null(Lst$xLabels) ){
      xLabels <- c(Lst$xLabels)
    }
    if( !is.null(Lst$title) ){
      title <- Lst$title
    }
  }
  # check for null values
  if( is.null(xLabels) ){
    xLabels <- c(1:ncol(x))
  }
  if( is.null(yLabels) ){
    yLabels <- c(1:nrow(x))
  }
  layout(matrix(data=c(1,2), nrow=1, ncol=2), widths=c(4,1), heights=c(1,1))
  # Red and green range from 0 to 1 while Blue ranges from 1 to 0
  ColorRamp <- rgb( seq(0,1,length=256),  # Red
    seq(0,1,length=256),  # Green
    seq(1,0,length=256))  # Blue
  ColorLevels <- seq(min, max, length=length(ColorRamp))
  # Reverse Y axis
  reverse <- nrow(x) : 1
  yLabels <- yLabels[reverse]
  x <- x[reverse,]
  # Data Map
  par(mar = c(3,5,2.5,2))
  image(1:length(xLabels), 1:length(yLabels), t(x), col=ColorRamp, xlab="",
    ylab="", axes=FALSE, zlim=c(min,max))
  if( !is.null(title) ){
    title(main=title)
  }
  axis(BELOW<-1, at=1:length(xLabels), labels=xLabels, cex.axis=0.7)
  axis(LEFT <-2, at=1:length(yLabels), labels=yLabels, las= HORIZONTAL<-1,
    cex.axis=0.7)
  # Color Scale
  par(mar = c(3,2.5,2.5,2))
  image(1, ColorLevels,
    matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
    col=ColorRamp,
    xlab="",ylab="",
    xaxt="n")
  layout(1)
}

# Return "traffic light" color gradient (0 is red, 1 is green)
goodnessColor <- function(frac) {
  myColorFunction <- colorRamp(colors=c(
      rgb(1.0, 0.0, 0.0),
      rgb(0.9, 0.9, 0.0),
      rgb(0.0, 1.0, 0.0)),
      space="rgb", interpolate="spline")
  return (rgb(myColorFunction(frac), maxColorValue=255))
}

quickGaussianBlur <- function(Y, X=1:length(Y), Xout=X, constSigma=1, Sigma=rep(constSigma,length(Y)), wordy=T) {
  NOut <- length(Xout)
  if (wordy) pb <- txtProgressBar(min = 0, max = NOut, style = 3)
  Sigma <- spline(method="natural", x=X, xout=Xout, y=Sigma)$y
  if (min(Sigma) == 0)
    return (Y)
  newY <- 0*Xout
  N <- length(Y)
  for (i in 1:NOut) {
    gauss <- exp(-(X-Xout[i])^2/(2*Sigma[i]^2))
    gauss <- gauss / sum(gauss)
    newY[i] <- gauss %*% Y
    if (wordy) setTxtProgressBar(pb, i)
  }
  if (wordy) close(pb);
  return (newY)
}

blurForScaleSpace <- function(Y, Sigma, sigCutoff=5) {
  N <- length(Y)
  YAug <- c(rev(Y[-1]), Y, rev(Y[-N]))
  NMax <- min(sigCutoff * Sigma + 1, N)
  tVal <- Sigma^2
  gauss <- exp(-(0:(NMax-1))^2/(2*tVal))/(sqrt(2*pi*tVal))
  manualGauss <- c(rev(gauss[-1]), gauss)
  manualBlur <- Y
  for (i in 1:N)
    manualBlur[i] <- manualGauss %*% YAug[N+i+(-NMax:(NMax-2))]
  return (manualBlur)
}

# 'Y' is a dataset smoothed at a scale of sqrt(t) for some 't' (not given
# here).  This function finds all the bright blobs at this level, returning the
# min and max pixel indices, together with the integrated area.
brightBlobList1D <- function(Y) {
  N <- length(Y)
  xMin <- xMax <- area <- c()
  Sum <- Cnt <- Base <- 0
  for (i in 1:N) {
    atALocalMin <- (i == N || Y[i] <= Y[i+1]) && (i == 1 || Y[i] <= Y[i-1])
    wentBelowBase <- (Y[i] <= Base)
    # Should we end this blob?
    if (wentBelowBase || atALocalMin) {
      # First: if we turned around before dipping down to the previous base
      # level, we have to recalculate which pixels should be included in the
      # blob, by backtracking.  (Here, Y[i] plays the role of the new Base)
      if (atALocalMin && !wentBelowBase && Cnt > 0) {
        Sum <- Cnt <- 0
        for (j in (i-1):1) {
          if (Y[j] < Y[i]) break
          Sum <- Sum + Y[j] - Y[i]
          Cnt <- Cnt + 1
        }
      }
      # Now, assuming we have a nonzero region, add it to the list
      if (Cnt > 0) {
        xMin <- c(xMin, i - Cnt)
        xMax <- c(xMax, i -  1 )
        area <- c(area, Sum)
      }
      # Reset key variables for next iteration
      Sum <- Cnt <- 0
      Base <- Y[i]
    }
    # Should we add this to the current blob?
    else {
      Sum <- Sum + Y[i] - Base
      Cnt <- Cnt + 1
    }
  }
  return (data.frame(xMin=xMin, xMax=xMax, area=area))
}

# Assumes Y is centered around zero.  R is Range of Y data.
plotBlobList1D <- function(X, Y, blobList, R, tVal) {
  myDX <- mean(diff(X))
  plot(xlim=range(X), ylim=c(0, 2*R), xlab="Q (1/A)", ylab="Gradient", main=sprintf("Bright blobs at scale %d", tVal), NULL)
  abline(h=0, col="black")
  dataBaseline <- 1.5*R
  abline(h=dataBaseline, col="gray")
  for (i in 1:nrow(blobList)) {
    xI <- c(blobList$xMin[i], blobList$xMax[i])
    meanHeight <- blobList$area[i] / (diff(range(xI)) + 1)
    points(col="blue", type="b", X[xI], rep(meanHeight, 2))
  }
  points(X, dataBaseline + Y, type="l", col="green")
}

# List scale space features.  Assumes equally spaced points.
scaleSpaceFeatures1D <- function(X=1:length(Y), Y, tVals) {
  nT <- length(tVals)
  N <- length(Y)
  dX  <- 0.5 * ( X[-1] +  X[- N   ])
  DdX <- diff(dX)
  dX2 <- 0.5 * (dX[-1] + dX[-(N-1)])
  SSI <- matrix(nrow=nT, ncol=N)
  L <- matrix(nrow=nT, ncol=N-2)
  yGuess <- max(abs(Y)) * c(-1,1)
  encounteredNoise <- FALSE
  lastMaxima <- N
  for (tI in 1:length(tVals)) {
    if (encounteredNoise) {
      L <- L[1:(tI-1),]
      tVals <- tVals[1:(tI-1)]
      break
    }
    tVal <- tVals[tI]
    SSI[tI,] <- blurForScaleSpace(Y, sqrt(tVal))
    #L[tI,] <- tVal * diff(diff(SSI[tI,]))
    L[tI,] <- tVal * diff(diff(SSI[tI,])/diff(X))/DdX
    nMaxima <- 0
    for (i in 2:(N-3))
      if (L[tI,i] < L[tI  ,i-1] && L[tI,i] < L[tI  ,i+1])
        nMaxima <- nMaxima + 1
    #LScale <- max(abs(L[1,]))/max(yGuess)
    #png(filename=sprintf("ASDF_t-%08d.png", tVal), width=800, height=800)
    #plot(main=sprintf("Scale-space evolution; t=%d", tVal), ylim=yGuess, dX2, L[tI,]/LScale, type="l", col="blue"); abline(h=0)
    #points(X, SSI[tI,], type="l", col="red")
    #legend(x="topleft", legend=c("Function", "Scale-normed Laplacian"), pch=rep(-1,2), lty=rep(1,2), col=c("red", "blue"))
    #dev.off()
    encounteredNoise <- nMaxima > lastMaxima
    lastMaxima <- nMaxima
  }
  Qi <- sqrtT <- c()
  QiMax <- sqrtTMax <- c()
  for (tI in 2:(length(tVals)-1)) {
    for (i in 2:(N-3)) {
      if (
        L[tI,i] < L[tI  ,i-1] &&
        L[tI,i] < L[tI  ,i+1]) {
        Qi    <- c(Qi   ,       i )
        sqrtT <- c(sqrtT, sqrt(tVals[tI]))
        if (
          L[tI,i] < L[tI-1,i-1] &&
          L[tI,i] < L[tI-1,i  ] &&
          L[tI,i] < L[tI-1,i+1] &&
          L[tI,i] < L[tI+1,i-1] &&
          L[tI,i] < L[tI+1,i  ] &&
          L[tI,i] < L[tI+1,i+1]) {
          QiMax    <- c(QiMax   ,              i )
          sqrtTMax <- c(sqrtTMax, sqrt(tVals[tI]))
          }
        }
    }
  }
  TS <- quickTimestamp()
  R <- diff(range(Y))
  #for (tI in 1:(length(tVals)-1)) {
  # tVal <- tVals[tI]
  # png(filename=sprintf("testPlotBlobs_%s_sqrtT-%04d.png", TS, sqrt(tVal)), width=800, height=800)
  # plotBlobList1D(X=X, Y=SSI[tI,], blobList=brightBlobList1D(SSI[tI,]), R=R, tVal=tVal)
  # dev.off()
  #}
  return (list(allCurves=data.frame(Qi=Qi, sqrtT=sqrtT), scaleMax=data.frame(Qi=QiMax, sqrtT=sqrtTMax)))
}

bubbleData <- function(X, Y, tVals) {
  ssData <<- scaleSpaceFeatures1D(X, Y, tVals)$scaleMax
  myResults <- cbind(ssData, Qmin=0, Qmax=0, area=0)
  for (sqrtT in unique(ssData$sqrtT)) {
    YBlur <- blurForScaleSpace(Y, sqrtT)
    bbl <<- brightBlobList1D(YBlur)
    for (i in which(ssData$sqrtT == sqrtT)) {
      # Find the corresponding grey blob
      j <- intersect(which(ssData$Qi[i] <= bbl$xMax), which(ssData$Qi[i] >= bbl$xMin))
      if (length(j) > 1) cat ("Too many!\n")
      if (length(j) < 1) {
        cat (sprintf("Too few!  Qi=%d, sqrt(t)=%d\n", ssData$Qi[i], sqrtT))
        next
      }
      myResults$area[i] <- bbl$area[j]
      myResults$Qmin[i] <- bbl$xMin[j]
      myResults$Qmax[i] <- bbl$xMax[j]
    }
  }
  #myResults$Qi <- X[myResults$Qi]
  #myResults$Qmin <- X[myResults$Qmin]
  #myResults$Qmax <- X[myResults$Qmax]
  return (myResults)
}

plotScaleSpaceFeatures1D <- function(ssPts, X, Y, baseFName="ssFeatures", ell, id) {
  scaleFactor <- mean(diff(X))
  sqrtTMax <- ssPts$scaleMax$sqrtT * scaleFactor
  sqrtT <- ssPts$allCurves$sqrtT * scaleFactor
  xR <- range(X)
  yMax <- 2*ell
  TR <- 0.5 # [T]op [R]egion height
  yR <- c(0, yMax*(1+TR))
  png(filename=sprintf("%s_%04d.png", baseFName, id), width=800, height=800)
  plot(NULL, xlim=xR, ylim=yR, xlab="Q (1/A)", ylab="sqrt(t) (1/A)",
    main=sprintf("Scale-space features; dataset %d", id))
  for (i in 1:length(ssPts$scaleMax$Qi))
    points(type="l", X[ssPts$scaleMax$Qi[i]] + c(-1,1)*sqrtTMax[i], rep(sqrtTMax[i],2), lwd=2, col="red")
  points(type="p", X[ssPts$allCurves$Qi], sqrtT)
  YRange <- range(Y)
  YSquish <- (TR*(Y-YRange[1])/diff(YRange) + 1) * yMax
  points(type="l", col="gray", X, YSquish)
  dev.off()
}

outputForGnuplot <- function(myData, fName, SD=4, rowsAtATime=20) {
  DF <- as.data.frame(myData)
  myStr <- paste(colnames(myData), collapse='\t')
  system(sprintf("echo -n \"%s\" > %s", myStr, fName))
  myStr <- ""
  for (Row in 1:nrow(myData)) {
    myStr <- sprintf("%s\n%s", myStr, paste(signif(myData[Row,], SD), collapse='\t'))
    if (Row %% rowsAtATime == 0) {
      system(sprintf("echo -n \"%s\" >> %s", myStr, fName))
      myStr <- ""
    }
  }
  system(sprintf("echo \"%s\" >> %s", myStr, fName))
}

# Evaluate the trace of the product of 2 matrices, by ONLY evaluating the
# diagonal elements of that product
smartTrace <- function(M1, M2) {
  N <- dim(M1)[1];
  tr <- 0;
  for (i in 1:N) {
    for (j in 1:N) {
      tr <- tr + M1[i,j] * M2[j,i];
    }
  }
  return (tr);
}

carefulChol <- function(K, maxEpsilon=1e-2, minEpsilon=1e-20, numTries=17) {
  numNA <- sum(is.na(K))
  if (numNA > 0) stop ("This is the REAL problem: there are NA values in K\n")
  baseVal <- min(diag(K))
  KChol <- try(chol(K), silent=T)
  epsilon <- minEpsilon
  while (!is.null(attr(KChol, "class")) && attr(KChol, "class") == "try-error" && epsilon <= maxEpsilon) {
    KChol <- try(chol(K + baseVal * epsilon * diag(nrow(K))), silent=T)
    epsilon <- epsilon * 10
  }
  if (epsilon >= maxEpsilon) stop("We just couldn't Cholesky-decompose this matrix\n")
  return (KChol)
}

# Assumes the data Y from datapoints X are Poisson distributed.  Bins them into
# pixels such that the relative uncertainty for each pixel is below epsilon (or
# equivalently, each pixel has at least NThresh counts).
adaptivePoissonDataBin <- function(X, Y, epsilon=1e-2, NThresh=1/sqrt(epsilon)) {
  N <- length(X)
  numPix <- weightedXSum <- totCnts <- 0
  XStart <- XBin <- YBin <- nBin <- c()
  firstX <- X[1]
  for (i in 1:N) {
    totCnts <- totCnts + Y[i]
    weightedXSum <- weightedXSum + X[i] * Y[i]
    numPix <- numPix + 1
    if (totCnts > NThresh) {
      XBin <- c(XBin, weightedXSum/totCnts)
      YBin <- c(YBin, totCnts)
      nBin <- c(nBin, numPix)
      XStart <- c(XStart, firstX)
      firstX <- X[i]
      numPix <- weightedXSum <- totCnts <- 0
    }
  }
  return (list(X=XBin, Y=YBin, n=nBin, XL=XStart))
}

plotAdaptivelyBinnedData <- function(X, Y, NThresh) {
  ABD <- adaptivePoissonDataBin(X, Y, NThresh=NThresh)
  XL <- ABD$XL
  XR <- c(XL[2:length(XL)], X[length(X)])
  png(filename=sprintf("TestAdaptiveBinning_N-%06d.png", NThresh), width=800, height=800)
  plot(X,Y,type="l", col="gray", main=sprintf("Adaptive binning; threshold=%d", NThresh), xlab="Q (1/A)", ylab="Intensity (no. of counts)")
  for (i in 1:length(XL)) {
    points(type="l", col="red", c(XL[i],XR[i]), rep(ABD$Y[i]/ABD$n[i], 2))
    points(type="p", col="red", ABD$X[i], ABD$Y[i]/ABD$n[i])
  }
  dev.off()
}

# Find out which n.dsets elements from dataset.list add up to give mystery.data.
which.n.datasets <- function(n.dsets, dataset.list, mystery.data, subset=1:20) {
  N <- length(dataset.list)
  # Answers the question: are there 'n' functions, having index 'i.start' or
  # higher, which sum to 'total'?
  recurse.check <- function(n, i.start, total) {
    if (i.start > N) return (list(success=FALSE, i=c()))
    if (n>2) cat(sprintf("Are there %d datasets, indexed %d or higher, that will work?\n", n, i.start))
    if (n==0)  {
      if (sum(total[subset]^2)==0) return (list(success=TRUE, i=c()))
      return (list(success=FALSE, i=c()))
    }
    for (i in i.start:N) {
      RC <- recurse.check(n=n-1, i.start=i+1, total=total[subset]-dataset.list[[i]][subset])
      if (RC$success) return (list(success=TRUE, i=c(i.start, RC$i)))
    }
    return (list(success=FALSE, i=c()))
  }
  RC <- recurse.check(n=n.dsets, i.start=1, total=mystery.data)
  if (RC$success) {
    cat("FOUND IT!\n")
  } else {
    cat("Too sad. :(\n")
  }
  return (RC$i)
}

combine.n <- function(clobber=TRUE, datasets, goal, num, my.choices=combn(1:ncol(datasets), num)) {
  if (clobber) compat.with.n1 <<- vector(length=ncol(my.choices), "numeric")
  pb <- txtProgressBar(min = 0, max = ncol(my.choices), style = 3)
  for (i in 1:ncol(my.choices)) {
    setTxtProgressBar(pb, i)
    i.plausible <- 0
    for (pixel in 2100:2120) {
      score <- sum(datasets[my.choices[,i],pixel])-goal[pixel]
      if (score != 0) break
      i.plausible <- pixel-2100+1
    }
    if (i.plausible > 2)
      cat(sprintf("Combination %d was cool, up to %d pixels.\n", i, i.plausible))
  }
  close(pb);
}


######################################################################
# PROBABILITY AND COVARIANCE FUNCTIONS
#
# This section gives all the functions for calculating probability-related
# quantities: covariance matrices, marginal likelihood and derivatives, and the
# like.

hypersForSEFullResEll <- function(logEll, sigmaFSq, sigmaNSq=0.25) {
  return (list(logEll=logEll, sigmaFSq=sigmaFSq, logSigmaNSq=log(sigmaNSq)))
}

hypersForSE <- function(ell=NULL, logEll=log(ell), sigmaFSq=NULL, logSigmaFSq=log(sigmaFSq), sigmaNSq=0.25, logSigmaNSq=log(sigmaNSq)) {
  names(logEll) <- names(logSigmaFSq) <- names(logSigmaNSq) <- c()
  return (list(logEll=logEll, logSigmaFSq=logSigmaFSq, logSigmaNSq=logSigmaNSq))
}

# Basically the arguments are a COPY-PASTE of the above function... not very robust,
# but should get the job done for now!!
hypersForSEMultiDim  <- function (ell=NULL, logEll=log(ell), sigmaFSq=NULL, logSigmaFSq=log(sigmaFSq), sigmaNSq=0.25, logSigmaNSq=log(sigmaNSq)) {
  return (hypersForSE(ell=ell, logEll=logEll, sigmaFSq=sigmaFSq, logSigmaFSq=logSigmaFSq, sigmaNSq=sigmaNSq, logSigmaNSq=logSigmaNSq))
}

hypersFromParamsSE <- function(params, constParams=c()) {
  allParams <- c(params, constParams)
  names(allParams) <- c(names(params), names(constParams))
  # "sum" looks a bit weird below; the only reason I'm using it is to get rid
  # of the distracting "names" attribute
  hypers <- list(
    logEll=sum(allParams["logEll"]),
    logSigmaFSq=sum(allParams["logSigmaFSq"]),
    logSigmaNSq=sum(allParams["logSigmaNSq"]))
  return(hypers)
}

# Noiseless covariance matrix for the SE (i.e. squared exponential, basically a
# Gaussian) covariance function
GramMatrixSE <- function(X, hypers, XOut=X) {
  twoEllSq <- 2 * exp(2 * hypers$logEll)
  sigmaFSq <- exp(hypers$logSigmaFSq)
  return (sigmaFSq * exp(-(outer(XOut, X, "-")^2)/twoEllSq))
}

# Type SEMultiDim
GramMatrixSEMultiDim <- function(X, hypers, XOut=X, wordy=T) {
  X <- as.matrix(X)
  XOut <- as.matrix(XOut)
  twoEllSq <- 2 * exp(2 * hypers$logEll)
  sigmaFSq <- exp(hypers$logSigmaFSq)
  covar <- function(i, j) {
    return (sigmaFSq * exp(-(sum((XOut[i,] - X[j,])^2))/twoEllSq))
  }
  GM <- matrix(NA, nrow=nrow(XOut), ncol=nrow(X))
  if (wordy) pb <- txtProgressBar(min = 0, max = nrow(XOut), style = 3)
  for (i in 1:nrow(XOut)) {
    if (wordy) setTxtProgressBar(pb, i)
    for (j in 1:nrow(X)) {
      GM[i,j] <- covar(i,j)
    }
  }
  if (wordy) close(pb);
  return (GM)
}

ellOfQAndSigmaFSq <- function(XOut, samplePoints, params, constParams=c(), logscale=T, returnCov=FALSE) {
  sigmaFSq <- mean(samplePoints[,"sigmaFSq"])
  hypers <- hypersFromParamsSE(params, constParams)
  K <- GramMatrixSE(X=samplePoints[,"X"], hypers=hypers)
  KInv <- solve(K + exp(params["logSigmaNSq"]) * diag(nrow(samplePoints)))
  K2 <- GramMatrixSE(X=samplePoints[,"X"], hypers=hypers, XOut=XOut)
  K2KInv <- K2 %*% KInv
  SP <- ifelse(test=rep(logscale,nrow(samplePoints)), yes=samplePoints[,"logEll"], no=samplePoints[,"ell"])
  ell <- K2KInv %*% SP
  if (logscale) ell <- exp(ell)
  # Also want to calculate the cholesky decomposition of the covariance matrix,
  # so we can draw samples from the distribution of ell(Q)
  L <- NULL
  if (returnCov) {
    Cov <- GramMatrixSE(X=XOut, hypers=hypers) - K2KInv %*% t(K2)
    L <- t(carefulChol(Cov))
  }
  return (list(X=XOut, ell=ell[,1], logEll=log(ell)[,1], L=L, sigmaFSq=sigmaFSq))
}

# Return the log of the marginal likelihood (ML)
logML <- function(hypers, KType, X, Y,
  KEnvir=constructKEnvir(X, KType, hypers)) {
  # First, setup by making sure we have all the right matrices
  KChol <- smartKChol(KEnvir, X, KType, hypers)
  KInv <- smartKInv(KEnvir, X, KType, hypers)

  # Get determinant by multiplying diagonal elements.  This is HALF the log of
  # det K, because det(KChol) = sqrt(det(K)).
  complexityTerm <- -sum(log(diag(KChol)))
  dataFitTerm <- -0.5 * t(Y) %*% KInv %*% Y
  numPointsTerm <- -0.5 * length(Y) * log(2*pi)

  return (dataFitTerm + complexityTerm + numPointsTerm)
}

logMLSE <- function(params, constParams=c(), KType="SE", X, Y,
  hypers=hypersFromParamsSE(params, constParams),
  KEnvir=constructKEnvir(X, KType, hypers)) {
  return (logML(hypers=hypers, KEnvir=KEnvir, KType=KType, X=X, Y=Y))
}

logMLSEVaryingNoise <- function(params, noise, KType="SEVaryingNoise", X, Y, KEnvir) {
  hypers <- hypersForSEFullResEll(logEll=params["logEll"], sigmaFSq=exp(params["logSigmaFSq"]), sigmaNSq=noise)
  return (logML(hypers=hypers, KEnvir=KEnvir, KType=KType, X=X, Y=Y))
}

gradLogMLForSEFullResEll <- function(hypers, KType="SEFullResEll", X, Y, indices=1:length(Y),
  KEnvir=constructKEnvir(X, KType, hypers)
  ) {
  N <- length(Y)
  alpha <- smartKInv(KEnvir, X, KType, hypers) %*% Y
  KNoNoise <- smartKNoNoise(KEnvir, X, KType, hypers)
  myMat <- alpha %*% t(alpha) - smartKInv(KEnvir, X, KType, hypers)
  ell <- exp(hypers$logEll)
  ell2 <- ell^2
  grad <- array(0, dim=length(indices))
  for (i in 1:length(indices)) {
    m <- indices[i]
    V <- KNoNoise[,m] * ((ell2 - ell2[m])/(2*(ell2 + ell2[m])) + 2*((X - X[m])^2*ell2[m])/((ell2 + ell2[m])^2))
    grad[i] <- myMat[m,] %*% V - 0.5 * myMat[m,m] * V[m]
  }
  return (grad)
}

erf <- function(x) 2*pnorm(sqrt(2)*x)-1
s<-function(x) erf(sqrt(pi)*x*0.5)
ds.1<-function(x) exp(-0.25*pi*x^2)
ellForAdHocWells <- function(X, ell0, params, g) {
  ell0 + 0.5*(ell0 - params["ellBottom"])*(s((X-params["xR"])/g) - s((X-params["xL"])/g))
}
logMLForAdHocWells <- function(X, Yg, ell0, params, g, sigmaFSq) {
  ell <- ellForAdHocWells(X=X, ell0=ell0, params=params, g=g)
  hypers <- hypersForSEFullResEll(logEll=log(ell), sigmaFSq=sigmaFSq)
  KType <- "SEFullResEll"
  KEnvir <- constructKEnvir(hypers=hypers,X=X,KType=KType)
  LML <- logML(hypers=hypers, KType=KType, X=X, Y=Yg, KEnvir=KEnvir)

  # Show progress
  elapsed <- as.numeric(quickTimestamp("%s")) - secBase
  png(filename=sprintf("AdHocWells_%s_region-%03d_%05d.png", TS, regionNum, myIter), width=800, height=800)
  par(mfrow=c(2,1))
  # First plot: ell(Q)
  plot(xlab=NA, ylab="ell", main=sprintf("Full-res ell(Q), t=%d sec, log(ML)=%.2f\n%s", elapsed, LML, TS), type="l", col="black", X, ell, ylim=c(0,ell0))
  # Second plot: function
  plot(main="Corresponding function fit", xlab="Q (1/A)", ylab="Counts", type="l", col="gray", X, GaussToPoisson(Yg))
  points(type="l", col="blue", X, GaussToPoisson(smartKnnKInv(KEnvir) %*% Yg))
  dev.off()
  myIter <<- myIter + 1

  cat(sprintf("(Reg. %2d, it. %3d:) (ell=%.2f, xL=%.2f, xR=%.2f) lnML: %-9.2f;  ", regionNum, myIter, params["ellBottom"], params["xL"], params["xR"], LML))
  return (LML)
}
grad.logMLForAdHocWells <- function(X, Yg, ell0, params, g, sigmaFSq) {
  ell <- ellForAdHocWells(X=X, ell0=ell0, params=params, g=g)
  hypers <- hypersForSEFullResEll(logEll=log(ell), sigmaFSq=sigmaFSq)
  grad.wrt.logEll <- gradLogMLForSEFullResEll(hypers=hypers, X=X, Y=Yg)
  partial.ellBottom <- -0.5*(s((X - params["xR"])/g) - s((X - params["xL"])/g))
  partial.xR <- -(0.5*(ell0 - params["ellBottom"])/g)*exp(-0.25*pi*((X - params["xR"])/g)^2)
  partial.xL <-  (0.5*(ell0 - params["ellBottom"])/g)*exp(-0.25*pi*((X - params["xL"])/g)^2)
  grad <- params # copy "names" structure
  grad["ellBottom"] <- sum(partial.ellBottom * grad.wrt.logEll / ell)
  grad["xR"] <- sum(partial.xR * grad.wrt.logEll / ell)
  grad["xL"] <- sum(partial.xL * grad.wrt.logEll / ell)

  cat(sprintf("Grad(lnML) = (ell=%.2f, xL=%.2f, xR=%.2f).\n", grad["ellBottom"], grad["xL"], grad["xR"]))
  return (grad)
}

genericXDistanceSq <- function(X) {
  X <- as.matrix(X)
  N <- nrow(X)
  distances <- matrix(NA, nrow=N, ncol=N)
  for (i in 1:N) {
    for (j in i:N) {
      distances[i,j] <- distances[j,i] <- sum((X[i,]-X[j,])^2)
    }
  }
  return (distances)
}

gradLogMLForSE <- function(params, constParams=c(), KEnvir=constructKEnvir(hypers=hypers,X=X,KType=KType), KType="SE", X, Y,
  hypers=get(sprintf("hypersFromParams%s", KType))(params, constParams)) {
  # First, setup by making sure we have all the right matrices
  KNoNoise <- smartKNoNoise(KEnvir, X, KType, hypers)
  KInv <- smartKInv(KEnvir, X, KType, hypers)

  # Generic matrix factor, common to derivatives for ALL hyperparameters
  alpha <- KInv %*% Y;
  matFactor <- alpha %*% t(alpha) - KInv;

  # Now, calculate derivatives w.r.t. log(parameters)
  grad <- params # Copy over the "names" structure
  if (length(which(names(grad)=="logEll")) == 1) {
    dQOverEllSq <- genericXDistanceSq(X)/(exp(params["logEll"]))^2
    grad["logEll"] <- 0.5 * smartTrace(matFactor, KNoNoise * dQOverEllSq)
  }
  if (length(which(names(grad)=="logSigmaFSq")) == 1) {
    grad["logSigmaFSq"] <- 0.5 * smartTrace(matFactor, KNoNoise)
  }
  if (length(which(names(grad)=="logSigmaNSq")) == 1) {
    grad["logSigmaNSq"] <- 0.5 * smartTrace(matFactor, exp(params["logSigmaNSq"])*diag(length(Y)))
  }
  return (grad)
}

gradLogMLForSEVaryingNoise <- function(params, noise, KType="SEVaryingNoise", X, Y, KEnvir) {
  hypers <- hypersForSEFullResEll(logEll=params["logEll"], sigmaFSq=exp(params["logSigmaFSq"]), sigmaNSq=noise)
  return (gradLogMLForSE(
      params=params, KEnvir=KEnvir, KType="SEVaryingNoise", X=X, Y=Y, hypers=hypers))
}

testDirecDeriv <- function(X, Y, ell=1, sigFSq=64^2) {
  params <- log(c(ell, sigFSq))
  names(params) <- c("logEll", "logSigmaFSq")
  constParams <- log(0.25)
  names(constParams) <- "logSigmaNSq"
  KEnvir <- constructKEnvir(X, "SE", hypersFromParamsSE(params, constParams))
  LML0 <- logMLSE(params=params, constParams=constParams, KEnvir=KEnvir, KType="SE", X=X, Y=Y)
  g <- gradLogMLForSE(params, constParams, KEnvir, "SE", X, Y)
  randomDir <- rnorm(n=2)
  DD <- randomDir %*% g
  alpha <- 1
  for (i in 1:10) {
    actual <- logMLSE(params=params+alpha*randomDir, constParams=constParams, KEnvir=KEnvir, KType="SE", X=X, Y=Y) - LML0
    expected <- DD * alpha
    cat(sprintf("Iteration %02d, ratio: %.7f.  (Actual %.2e; expected %.2e)\n", i, actual/expected, actual, expected))
    alpha <- alpha / 10.0
  }
}

# Noiseless covariance matrix for the SE generalized to accept a varying
# lengthscale
GramMatrixSEVaryingEll <- function(X, hypers, XOut=X, wordy=T) {
  # Construct varying-ell functions from hypers
  ellAndSigma <- ellOfQAndSigmaFSq(XOut=X, samplePoints=hypers$samplePoints, params=hypers$params)
  ellOut <- ell <- ellAndSigma$ell
  sigmaFSq <- ellAndSigma$sigmaFSq
  if (!identical(X, XOut))
    ellOut <- ellOfQAndSigmaFSq(XOut=XOut, samplePoints=hypers$samplePoints, params=hypers$params)$ell
  ell2 <- ell^2
  ellOut2 <- ellOut^2
  helper <- function(i, j) {
    sumEllSq <- ell2[j] + ellOut2[i]
    return (sigmaFSq * sqrt(2*ell[j]*ellOut[i] / sumEllSq) * exp(-(X[j]-XOut[i])^2/sumEllSq))
  }
  N <- length(X)
  NOut <- length(XOut)
  K <- matrix(0.0, nrow=NOut, ncol=N)
  if (wordy) pb <- txtProgressBar(min = 0, max = NOut, style = 3)
  for (i in 1:NOut) {
    if (wordy) setTxtProgressBar(pb, i)
    for (j in 1:N) {
      K[i, j] <- helper(i, j)
    }
  }
  if (wordy) close(pb);
  return (K)
}

# A squared exponential with "full-resolution" ell(Q), i.e. we keep track of
# every individual value of ell(Q)
GramMatrixSEFullResEll <- function(X, hypers, XOut=X) {
  N <- length(X)
  NOut <- length(XOut)
  K <- matrix(0.0, nrow=NOut, ncol=N)
  ell <- exp(hypers$logEll)
  ellOut <- exp(spline(method="natural", x=X, xout=XOut, y=hypers$logEll)$y)
  ell2 <- ell^2
  ellOut2 <- ellOut^2
  for (Row in 1:NOut) {
    for (Col in 1:N) {
      sumEllSq <- ellOut2[Row] + ell2[Col]
      K[Row, Col] <- hypers$sigmaFSq * sqrt(2*ellOut[Row]*ell[Col]/sumEllSq) * exp(-(XOut[Row]-X[Col])^2/sumEllSq)
    }
  }
  return (K)
}

GramMatrixSEVaryingNoise <- function(X, hypers, XOut=X) {
  return (GramMatrixSE(X=X, XOut=XOut,
      hypers=list(logEll=hypers$logEll, logSigmaFSq=log(hypers$sigmaFSq))))
}

# Sum of covariance matrixes with different hyperparams, masked off so they're
# each confined to different regions
GramMatrixMaskedRegions <- function(X, hypers, XOut=X) {
  K.nn <- matrix(0, nrow=length(XOut), ncol=length(X))
  for (i in 1:nrow(hypers$contribs)) {
    pp <- hypers$contribs[i,]
    # Cutoff at +- s(5), because this is indistinguishable from zero
    CUT <- 5
    J <- indicesInRange(v=X, Min=pp["xL"]-CUT*pp["ell"], Max=pp["xR"]+CUT*pp["ell"])
    K <- indicesInRange(v=XOut, Min=pp["xL"]-CUT*pp["ell"], Max=pp["xR"]+CUT*pp["ell"])
    hyps <- hypersForSE(ell=pp["ell"], sigmaFSq=pp["sigmaFSq"])
    K.local <- GramMatrixSE(X=X[J], hypers=hyps, XOut=XOut[K])
    mask.vals     <- 0.5*(s((X[J] - pp["xL"])/pp["ell"]) - s((X[J] - pp["xR"])/pp["ell"]) )
    mask.vals.Out <- 0.5*(s((XOut[K] - pp["xL"])/pp["ell"]) - s((XOut[K] - pp["xR"])/pp["ell"]) )
    mask <- outer(mask.vals.Out, mask.vals)
    K.nn[K,J] <- K.nn[K,J] + mask * K.local
  }
  return (K.nn)
}

# Sum of a bunch of different covariance matrices
GramMatrixSum <- function(X, hypers, XOut=X) {
  X <- as.matrix(X)
  XOut <- as.matrix(XOut)
  K <- matrix(0, nrow=nrow(XOut), ncol=nrow(X))
  for (i in 1:length(hypers$contribs)) {
    KType <- hypers$contribs[[i]][["KType"]]
    hyps <- hypers$contribs[[i]][["hypers"]]
    GramMatrixFunction <- get(sprintf("GramMatrix%s", KType))
    K <- K + GramMatrixFunction(X=X, hypers=hyps, XOut=XOut)
  }
  return (K)
}

# Return the matrix to give the denoised function vs. XOut, when multiplied by
# noisy data vs. X.
interpolatedFullResEllMatrix <- function(X, hypers, XOut, KType="SEFullResEll",
  KEnvir=constructKEnvir(X, KType, hypers)) {
  K2 <- GramMatrixSEFullResEll(X=X, hypers=hypers, XOut=XOut)
  return (K2 %*% smartKInv(KEnvir, X, KType, hypers))
}

######################################################################
# SMART K-CALCULATING FUNCTIONS
#
# The point of these functions is to avoid lots of very expensive unnecessary
# recomputation.  By constructing an environment, and passing it around, we can
# store results of large matrix computations and make them available to
# different functions.
#   Two main pieces here:
# 1) Core functions: checking whether all the supplied parameters are equal to
#    the values in the environment, as well as constructing the environment in
#    the first place
# 2) Interface functions: smartK, smartKInv, etc. will use the checking
#    functions, and also perform the computations iff necessary

# This function is the canonical list of names of objects which might be stored
# in a KEnvir.
objectsInKEnvir <- function() {
  return (c("K", "KNoNoise", "KChol", "KInv", "KnnKInv", "KnnInv", "L"))
}

# Construct an environment to hold a K-matrix, and other matrices calculated
# from it
constructKEnvir <- function(X, KType, hypers) {
  KEnvir <- new.env()
  objNames <- objectsInKEnvir()
  updated <- rep(FALSE, length(objNames))
  names(updated) <- objNames
  assign("X", X, envir=KEnvir)
  assign("KType", KType, envir=KEnvir)
  assign("hypers", hypers, envir=KEnvir)
  assign("updated", updated, envir=KEnvir)
  return (KEnvir)
}

# Checks whether any quantities determining the K-matrices have changed.  SIDE
# EFFECT: if they *have* changed, set the status of each of them to
# "non-updated"
updateKStatus <- function(KEnvir, X, KType, hypers) {
  samePoints <- (identical(X, get(x="X", envir=KEnvir)))
  sameType <- (identical(KType, get(x="KType", envir=KEnvir)))
  sameHypers <- (identical(hypers, get(x="hypers", envir=KEnvir)))
  unchanged <- (samePoints && sameType && sameHypers)
  if (!unchanged) {
    updated <- get(x="updated", envir=KEnvir)
    updated[] <- FALSE
    assign("X", X, envir=KEnvir)
    assign("KType", KType, envir=KEnvir)
    assign("hypers", hypers, envir=KEnvir)
    assign("updated", updated, envir=KEnvir)
  }
  return (unchanged)
}

upToDate <- function(KEnvir, name) {
  updated <- get(x="updated", envir=KEnvir)
  return (updated[name])
}

# It may seem odd to have a special function to perform this trivial task.  But
# beware!  We may have an outdated copy of the "updated" vector hanging around.
# We need to make sure we don't mark any very-recently-updated matrices as
# "non-updated", due to our old information!
updateOneMatrix <- function(KEnvir, name, mat) {
  updated <- get(x="updated", envir=KEnvir)
  updated[name] <- TRUE
  assign("updated", updated, envir=KEnvir)
  assign(name, mat, envir=KEnvir)
}

# Calculate the (noise-free) Gram Matrix of the requested type (KType),
# evaluated at the points in X with hyperparameters given by hypers.  What
# makes it "smart" is that it checks whether this has already been done, and if
# so it returns the old matrix instead of wastefully recomputing it.
smartKNoNoise <- function(KEnvir, X=get(x="X", envir=KEnvir), KType=get(x="KType", envir=KEnvir), hypers=get(x="hypers", envir=KEnvir)) {
  updateKStatus(KEnvir, X, KType, hypers)
  if (!upToDate(KEnvir, "KNoNoise")) {
    GramMatrixFunction <- get(sprintf("GramMatrix%s", KType))
    KNoNoise <- GramMatrixFunction(X=X, hypers=hypers)
    updateOneMatrix(KEnvir, "KNoNoise", KNoNoise)
  } else {
    KNoNoise <- get(x="KNoNoise", envir=KEnvir)
  }
  return (KNoNoise)
}

# The inverse of the NOISELESS matrix
smartKnnInv <- function(KEnvir, X=get(x="X", envir=KEnvir), KType=get(x="KType", envir=KEnvir), hypers=get(x="hypers", envir=KEnvir)) {
  updateKStatus(KEnvir, X, KType, hypers)
  if (!upToDate(KEnvir, "KnnInv")) {
    KnnInv <- robustSolve(smartKNoNoise(KEnvir))
    updateOneMatrix(KEnvir, "KnnInv", KnnInv)
  } else {
    KnnInv <- get(x="KnnInv", envir=KEnvir)
  }
  return (KnnInv)
}

# Calculate the (noisy) Gram Matrix of the requested type (KType), evaluated at
# the points in X with hyperparameters given by hypers.
smartK <- function(KEnvir, X=get(x="X", envir=KEnvir), KType=get(x="KType", envir=KEnvir), hypers=get(x="hypers", envir=KEnvir)) {
  updateKStatus(KEnvir, X, KType, hypers)
  if (!upToDate(KEnvir, "K")) {
    KNoNoise <- smartKNoNoise(KEnvir, X, KType, hypers)
    K <- KNoNoise + diag(nrow=nrow(KNoNoise), x=exp(hypers$logSigmaNSq))
    updateOneMatrix(KEnvir, "K", K)
  } else {
    K <- get(x="K", envir=KEnvir)
  }
  return (K)
}

# Calculate the Cholesky decomposition of the Gram Matrix K, evaluated at the
# points in X with hyperparameters given by hypers.
smartKChol <- function(KEnvir, X=get(x="X", envir=KEnvir), KType=get(x="KType", envir=KEnvir), hypers=get(x="hypers", envir=KEnvir)) {
  updateKStatus(KEnvir, X, KType, hypers)
  if (!upToDate(KEnvir, "KChol")) {
    K <- smartK(KEnvir, X, KType, hypers)
    KChol <- carefulChol(K)
    updateOneMatrix(KEnvir, "KChol", KChol)
  } else {
    KChol <- get(x="KChol", envir=KEnvir)
  }
  return (KChol)
}

# Calculate the Cholesky decomposition of the Gram Matrix K, evaluated at the
# points in X with hyperparameters given by hypers.
smartKInv <- function(KEnvir, X=get(x="X", envir=KEnvir), KType=get(x="KType", envir=KEnvir), hypers=get(x="hypers", envir=KEnvir)) {
  updateKStatus(KEnvir, X, KType, hypers)
  if (!upToDate(KEnvir, "KInv")) {
    KChol <- smartKChol(KEnvir, X, KType, hypers)
    KInv <- chol2inv(KChol)
    updateOneMatrix(KEnvir, "KInv", KInv)
  } else {
    KInv <- get(x="KInv", envir=KEnvir)
  }
  return (KInv)
}

# Calculate the product of the noiseless K with the inverse of the noisy K.
# Makes sense to precompute, because this combination occurs both when finding
# the mean function, and every time you make a draw from the distribution.
smartKnnKInv <- function(KEnvir, X=get(x="X", envir=KEnvir), KType=get(x="KType", envir=KEnvir), hypers=get(x="hypers", envir=KEnvir)) {
  updateKStatus(KEnvir, X, KType, hypers)
  if (!upToDate(KEnvir, "KnnKInv")) {
    KnnKInv <- smartKNoNoise(KEnvir, X, KType, hypers) %*% smartKInv(KEnvir, X, KType, hypers)
    updateOneMatrix(KEnvir, "KnnKInv", KnnKInv)
  } else {
    KnnKInv <- get(x="KnnKInv", envir=KEnvir)
  }
  return (KnnKInv)
}

# Calculate the lower-triangular Cholesky decomposition of the covariance
# matrix, which lets you take draws from the distribution of functions
smartL <- function(KEnvir, X=get(x="X", envir=KEnvir), KType=get(x="KType", envir=KEnvir), hypers=get(x="hypers", envir=KEnvir)) {
  updateKStatus(KEnvir, X, KType, hypers)
  if (!upToDate(KEnvir, "L")) {
    Knn <- smartKNoNoise(KEnvir, X, KType, hypers)
    L <- t(carefulChol(Knn - smartKnnKInv(KEnvir, X, KType, hypers) %*% Knn))
    updateOneMatrix(KEnvir, "L", L)
  } else {
    L <- get(x="L", envir=KEnvir)
  }
  return (L)
}

######################################################################
# OPTIMIZATION FUNCTIONS
#
# These functions make use of the above probability functions, to find the
# optimal values for quantities of interest (best hyperparameters, most
# probable underlying function, etc.)

# Given a noisy dataset (assumed Poisson), and enough hyperparameters to define
# a varying-Ell Gram matrix, this function will return the mean underlying
# function, along with the lower-triangular Cholesky decomposition of its
# covariance matrix, enabling multiple draws of the function to be made.
denoise <- function(X, Y, hypers, takeSqrt=T, KType="SEVaryingEll", bin=1,
  KEnvir=constructKEnvir(X, KType, hypers)
  ) {
  if (bin > 1) {
    nPts <- trunc(length(Y)/bin)*bin
    X <- rowMeans(matrix(X[1:nPts], byrow=T, ncol=bin))
    Y <- rowSums(matrix(Y[1:nPts], byrow=T, ncol=bin))
  }
  if (takeSqrt) Y <- sqrt(Y)
  Knn <- smartKNoNoise(KEnvir=KEnvir, X=X, KType=KType, hypers=hypers)
  KInv <- smartKInv(KEnvir=KEnvir, X=X, KType=KType, hypers=hypers)
  KnnKInv <- Knn %*% KInv
  u <- KnnKInv %*% Y
  Cov <- Knn - KnnKInv %*% t(Knn)
  L <- t(carefulChol(Cov))
  return (list(u=u, L=L, KEnvir=KEnvir))
}

defaultEllRange <- function(X) {
  dX <- abs(diff(X))
  ellMin <- min(dX[which(dX>0)])
  ellMax <- 2*diff(range(X))
  return (c(ellMin,ellMax))
}

GPSEVaryingNoise <- function(X, Y, noiseVariance, ellGrid=T, label=quickTimestamp(), iter=1, meanLogEll, sdLogEll=log(2)/2.0, ellBnds=defaultEllRange(X)) {
  twoSigSq <- 2*sdLogEll^2
  logEllBnds <- log(ellBnds)
  logEll <- mean(logEllBnds)
  sigmaFSq <- diff(range(Y))^2
  params <- c(logEll, log(sigmaFSq))
  names(params) <- c("logEll", "logSigmaFSq")
  KType <- "SEVaryingNoise"
  hypers <- hypersForSEFullResEll(logEll=logEll, sigmaFSq=sigmaFSq, sigmaNSq=noiseVariance)
  KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)

  # Let's test a gridded sequence of ell-values
  if (ellGrid) {
    GC <- 40 # "[G]rid [C]ount"; hardcoded for now
    logEllVals <- seq(from=logEllBnds[1], to=logEllBnds[2], length.out=GC)
    logMLVals <- logEllVals
    diffs <- abs(diff(Y))
    logSigmaFSqMin <- log(min(diffs[which(diffs>0)]))
    logSigmaFSqMax <- log(diff(range(Y)))
    #prior <- function(logEll) return (-((logEll - meanLogEll)^2)/twoSigSq)
    f <- function(logSigmaFSq, hypers, KEnvir) {
      hypers$sigmaFSq <- exp(logSigmaFSq)
      #return (logML(hypers=hypers, KType=KType, X=X, Y=Y, KEnvir=KEnvir) + prior(hypers$logEll))
      return (logML(hypers=hypers, KType=KType, X=X, Y=Y, KEnvir=KEnvir))
    }
    for (i in 1:GC)  {
      hypers$logEll <- logEllVals[i]
      opt <- optimize(f=f, interval=c(logSigmaFSqMin, logSigmaFSqMax), maximum=T, tol=0.001,
        # Additional parameters to pass to 'f'
        hypers=hypers, KEnvir=KEnvir)
      logMLVals[i] <- opt$objective
    }
    png(filename=sprintf("TEMP_FIGS/lastGriddedEll_%s_%05d.png", label, iter), width=800, height=800)
    plot(type="b", log="x", exp(logEllVals), logMLVals)
    dev.off()
    # Look for local optima
  }

  # Now, optimize ell and sigmaF simultaneously
  opt <- optim(par=params, fn=logMLSEVaryingNoise, gr=gradLogMLForSEVaryingNoise, method="L-BFGS-B",
    control=list(fnscale=-1, trace=0, maxit=400),
    lower=c(logEllBnds[1], -Inf),
    upper=c(logEllBnds[2], +Inf),
    # Extra parameters needed for fn and gr:
    X=X, Y=Y, KEnvir=KEnvir, noise=noiseVariance)
  hypers <- hypersForSEFullResEll(logEll=opt$par["logEll"], sigmaFSq=exp(opt$par["logSigmaFSq"]), sigmaNSq=noiseVariance)
  return (list(hypers=hypers, logML=opt$value))
}

GPFixedEll <- function(X, Y, ell) {
  constParams <- log(ell)
  names(constParams) <- "logEll"
  params <- log(c(diff(range(Y))^2, var(Y)))
  names(params) <- c("logSigmaFSq", "logSigmaNSq")
  hypers <- hypersFromParamsSE(params, constParams)
  KType <- "SE"
  KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)
  opt <- optim(par=params, fn=logMLSE, gr=gradLogMLForSE,
    control=list(fnscale=-1, trace=0, maxit=400),
    # Extra parameters needed for fn and gr:
    X=X, Y=Y, constParams=constParams, KType=KType, KEnvir=KEnvir)
  return (list(hypers=c(constParams, opt$par), KEnvir=KEnvir))
}

bestSigmaFSq <- function(X, Yg, ell, sigmaNSq=0.25) {
  # First, let's subsample
  cutoff <- 300
  N <- length(X)
  if (N > cutoff) {
    indices <- seq(from=1, to=N, length.out=cutoff)
    X <- X[indices]
    Yg <- Yg[indices]
  }

  constParams <- log(c(ell, sigmaNSq))
  names(constParams) <- c("logEll", "logSigmaNSq")
  params <- 0 # temp. value, to be overwritten later
  names(params) <- "logSigmaFSq"
  hypers <- hypersFromParamsSE(params, constParams)
  KType <- "SE"
  KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)

  f <- function(sigmaFSq) {
    params <- log(sigmaFSq)
    names(params) <- "logSigmaFSq"
    return (logMLSE(params, constParams, KType, X, Yg, KEnvir))
  }

  sigmaFSqBnds <- c(0.01, 10)*diff(range(Yg))^2
  opt <- optimize(f=f, interval=sigmaFSqBnds, maximum=T, tol=0.0001*diff(sigmaFSqBnds))
  return (opt$maximum)
}

bestSigmaFSqVaryingEll <- function(X, Yg, ell, sigmaNSq=0.25) {
  N <- length(X)
  KType <- "SEFullResEll"
  logEll <- log(ell)
  f <- function(sigmaFSq) {
    hypers <- hypersForSEFullResEll(logEll=logEll, sigmaFSq=sigmaFSq, sigmaNSq=sigmaNSq)
    LML <- logML(hypers=hypers, KType=KType, X=X, Y=Yg)
    return (LML)
  }
  sigmaFSqBnds <- c(0.01, 10)*diff(range(Yg))^2
  opt <- optimize(f=f, interval=sigmaFSqBnds, maximum=T, tol=0.0001*diff(sigmaFSqBnds))
  return (opt$maximum)
}

# Choosing bounds for ell: by default, we don't want to consider lengthscales
# below the pixel resolution, or above the range of the data
bestHypersConstantEllGaussian <- function(X, Yg, sigmaNSq=0.25, ellMin=min(abs(diff(X))), ellMax=10*diff(range(X)), logEllBnds=log(c(ellMin, ellMax))) {
  logSigmaFSqBnds <- log(c(0, 100*diff(range(Yg))^2))

  # Setup the parameters and environment
  params <- c(mean(logEllBnds), log(diff(range(Yg))^2))
  names(params) <- c("logEll", "logSigmaFSq")
  noiseParam <- log(sigmaNSq)
  names(noiseParam) <-  "logSigmaNSq"
  hypers <- hypersFromParamsSE(params=params, constParams=noiseParam)
  KType <- "SE" # We're fitting to a simple SE model here, i.e. constant ell
  KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)

  opt <- optim(par=params, fn=logMLSE, gr=gradLogMLForSE, method="L-BFGS-B",
    lower=c(logEllBnds[1], logSigmaFSqBnds[1]),
    upper=c(logEllBnds[2], logSigmaFSqBnds[2]),
    control=list(fnscale=-1, trace=0, maxit=400),
    # Extra parameters needed for fn and gr:
    X=X, Y=Yg, constParams=noiseParam, KType=KType, KEnvir=KEnvir)

  myParams <- exp(opt$par)
  names(myParams) <- gsub(x=names(myParams), pattern="log(.)", perl=T, replacement="\\L\\1")

  return (list(ell=myParams["ell"], sigmaFSq=myParams["sigmaFSq"], logML=opt$value, KEnvir=KEnvir))
}

# What values for 'ell' and 'sigmaFSq' best describe the Poisson-noised data
# 'Y', evaluated at the points 'X'?
bestHypersConstantEllPoisson <- function(X, Y) {
  return (bestHypersConstantEllGaussian(X=X, Yg=PoissonToGauss(Y), sigmaNSq=0.25))
}

# Fits the given data to a Gaussian Process by maximizing marginal likelihood.
# Returns all local optima for tuples (ell, sigma_f, sigma_n) together with
# log(ML) values
GPGaussianGenericMaxML <- function(X, Y, nPts=20, sigmaNSq=var(Y), constNoise=FALSE, wordy=FALSE, doGrid=T, label="HiThere") {

  logMLWithPlotWrapper <- function(params, constParams=c(), KType="SE", X, Y,
    hypers=hypersFromParamsSE(params, constParams),
    KEnvir=constructKEnvir(X, KType, hypers)) {
    png(filename=sprintf("LatestGoodFit_%s_%s.png", label, quickTimestamp()), width=1000, height=800)
    plot(X, Y, type="l", col="gray")
    points(X, smartKnnKInv(KEnvir=KEnvir) %*% Y, col="black", type="l")
    dev.off()
    return (logMLSE(params=params, constParams=constParams, KType=KType, X=X, Y=Y, hypers=hypers, KEnvir=KEnvir))
  }
  # Setup grid of ell-values to map the global structure (when looking for
  # local optima of ell)
  logBnds <- log(defaultEllRange(X))
  ellVals <- exp(seq(from=logBnds[1], to=logBnds[2], length.out=nPts))
  # Setup the hyperparameters and environment we'll need
  params <- log(diff(range(Y))^2)
  names(params) <- "logSigmaFSq"
  noiseParam <- log(sigmaNSq)
  names(noiseParam) <-  "logSigmaNSq"
  constParams <- 0 # Dummy value, will get overwritten inside the loop
  names(constParams) <- "logEll"
  if (constNoise) {
    constParams <- c(constParams, noiseParam)
  } else {
    params <- c(params, noiseParam)
  }
  hypers <- hypersFromParamsSE(params, constParams)
  KType <- "SE" # We're fitting to a simple SE model here, i.e. constant ell
  KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)
  # Now, optimize at each ell, and plot log(ML)
  logMLVals <- ellVals # Just setting it up to be the same size, will put real data in later
  pngname <- sprintf("ellFunction_%s.png", quickTimestamp())
  if (doGrid) {
    for (iEll in 1:nPts) {
      constParams["logEll"] <- log(ellVals[iEll])
      opt <- optim(par=params, fn=logMLSE, gr=gradLogMLForSE,
        control=list(fnscale=-1, trace=0, maxit=400),
        # Extra parameters needed for fn and gr:
        X=X, Y=Y, constParams=constParams, KType=KType, KEnvir=KEnvir)
      logMLVals[iEll] <- opt$value
      if (wordy) {
        png(filename=pngname, width=800, height=800)
        plot(ellVals[1:iEll], logMLVals[1:iEll], type="b", xlim=c(ellVals[1], ellVals[nPts]), log="x")
        dev.off()
      }
    }
    # Default logic: just take the longest-lengthscale local optimum
    for (i in 2:(nPts-1)) {
      if (logMLVals[i+1] <= logMLVals[i] && logMLVals[i-1] <= logMLVals[i]) {
        boundL <- ellVals[i-1]
        best <- ellVals[i]
        boundR <- ellVals[i+1]
      }
    }
    bestI <- which(logMLVals==max(logMLVals))
    if (bestI == 1 || bestI == nPts) {
      boundL <- ifelse(bestI==1, 0, ellVals[nPts-1])
      best <- ellVals[bestI]
      boundR <- ifelse(bestI==nPts, Inf, ellVals[2])
    }
  } else {
    boundL <- exp(logBnds[1])
    best <- exp(mean(logBnds))
    boundR <- exp(logBnds[2])
  }
  myNames <- c("logEll", names(params))
  params <- c(log(best), params)
  names(params) <- myNames
  constParams <- constParams[-which(names(constParams)=="logEll")]
  opt <- optim(par=params, fn=logMLWithPlotWrapper, gr=gradLogMLForSE, method="L-BFGS-B",
    control=list(fnscale=-1, trace=0, maxit=400),
    lower=c(log(boundL), -Inf, -Inf),
    upper=c(log(boundR), +Inf, +Inf),
    # Extra parameters needed for fn and gr:
    X=X, Y=Y, KType=KType, KEnvir=KEnvir, constParams=constParams)
  cat("Later, this function should return ALL local optima, together with their probabilities.\n")
  return (c(opt$par, constParams))
}

jumpFactor <- function(lengthscale, X, i) {
  Delta <- ifelse(test=(i>1), yes=(X[i] - X[i-1]), no=(X[i+1] - X[i]))
  return(exp(Delta/lengthscale))
}

# Given a sorted vector X, which represents the points where we have data, and
# an index i into X: return a set of indices into X such that:
#  a) between (X[i] - dX) and (X[i] + dX), all points are included, and
#  b) outside this region, points are included with exponentially decaying
#     density.  (Specifically: the distance to the next point is 'a' times the
#     previous distance)
focusRegion <- function(X, i, dX, a=1.1, directions=c(-1,1), ...) {
  included <- array(0, dim=length(X))
  included[i] <- 1
  fillDirection <- function(direction) {
    j <- i + direction
    while (j <= length(X) && j >= 1 && abs(X[j] - X[i]) <= dX) {
      included[j] <<- 1
      j <- j + direction
    }
    # Now, 'j' holds the index of the first point that is too far
    jump <- 0
    k <- 1
    index <- j
    while (index <= length(X) && index >= 1) {
      included[index] <<- 1
      jump <- jump + k
      k <- k * a
      index <- round(j + direction * jump)
    }
  }
  for (dir in directions)
    fillDirection(dir)
  return (which(included == 1))
}

# Return a set of internal and external points.  The internal points are
# sampled with full density inside the chosen region; the external points are
# sampled with exponentially decaying density.
focusedSubset <- function(X, nInt, LExtPts, firstI, lastI=min(length(X), firstI-1+nInt)) {
  internal <- firstI:lastI
  dX <- min(abs(diff(X[internal])))
  a <- jumpFactor(LExtPts*dX/2.0, X, firstI)
  FRL <- FRR <- c()
  if(firstI > 1) FRL <- focusRegion(X, i=firstI-1, dX=0, a=a, directions=-1)
  if(lastI < length(X)) FRR <- focusRegion(X, i=lastI+1, dX=0, a=a, directions=+1)
  allPts <- c(FRL, internal, FRR)
  intIdx <- length(FRL)+(1:length(internal))
  return (list(intPts=internal, extPts=c(FRL,FRR), allPts=allPts, intIdx=intIdx))
}

# Divvy up the data into a set of "focus regions".
focusRegionFamily <- function(X, nInt, La) {
  # First: get a set of focus regions of width 'nInt' that cover the data as
  # densely as possible, without overlapping
  N <- length(X)
  first <- round(seq(from=1, to=N-nInt, by=nInt + ((N %% nInt)/round(N/nInt))))
  # Compute the focus regions
  frList <- list()
  i <- 1
  for (f in first) {
    frList[[i]] <- focusedSubset(X=X, nInt=nInt, LExtPts=La, firstI=f)
    i <- i + 1
  }
  return (frList)
}

# Assign a mean X-position to each focus region
focusRegionXVals <- function(X, frList) {
  xVals <- c()
  for (i in 1:length(frList))
    xVals <- c(xVals, mean(X[frList[[i]]$intPts]))
  return (xVals)
}

optimizeAllFocusRegions <- function (X, Yg, frList) {
  nReg <- length(frList)
  logML <- bestEll <- 0*(1:nReg)
  totalSigmaFSq <- 0
  for (i in 1:nReg) {
    idxs <- frList[[i]]$allPts
    bestHypers <- bestHypersConstantEllGaussian(X[idxs], Yg[idxs])
    totalSigmaFSq <- totalSigmaFSq + bestHypers$sigmaFSq
    bestEll[i] <- bestHypers$ell
    logML[i] <- bestHypers$logML
  }
  return (list(ell=bestEll, sigmaFSq=totalSigmaFSq/nReg, logML=logML))
}

# We want to find bounds for ell which enclose the value having the highest
# marginal likelihood (using a very heuristic approach).  Strategy: start at a
# small value (smaller ell-values are generally preferred), then work our way
# up in a grid until the ML decreases.  The maximum falls somewhere between
# this last value, and the value two before.
findBestEllBounds <- function(Y, X, ell0, dEll=NULL) {
  if (is.null(dEll)) dEll <- ell0
  params <- log(var(Y))
  names(params) <- "logSigmaFSq"
  constParams <- log(c(ell0, 0.25))
  names(constParams) <- c("logEll", "logSigmaNSq")
  hypers <- hypersFromParamsSE(params, constParams)
  KType <- "SE" # We're fitting to a simple SE model here, i.e. constant ell
  KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)

  # Find first ML; make sure it's increasing
  ells <- ell0
  opt <- optim(par=params, fn=logMLSE, gr=gradLogMLForSE,
    control=list(fnscale=-1, trace=0, maxit=400),
    # Extra parameters needed for fn and gr:
    X=X, Y=Y, constParams=constParams, KType=KType, KEnvir=KEnvir)
  LMLs <- opt$value
  # Note: switched roles of constParams and params for this calc
  grad0 <- gradLogMLForSE(constParams, params, KEnvir, KType, X, Y)
  if (grad0["logEll"] < 0)
    stop("Please pick a lower starting point for ell.\n")

  repeat {
    N <- length(ells)
    ells <- c(ells, ells[N] + dEll)
    constParams["logEll"] <- log(ells[N+1])
    opt <- optim(par=opt$par, fn=logMLSE, gr=gradLogMLForSE,
      control=list(fnscale=-1, trace=0, maxit=400),
      # Extra parameters needed for fn and gr:
      X=X, Y=Y, constParams=constParams, KType=KType, KEnvir=KEnvir)
    LMLs <- c(LMLs, opt$value)
    if (LMLs[N] > LMLs[N+1])
      break
  }

  return (c(ells[max(1, N-1)], ells[N+1]))
}

# Assuming the datapoints Y(X) are Poisson-noised for some underlying true
# signal, this function returns the most probable parameters (ell, sigmaF) in
# the region of width dX around X[i]
focusRegionFit <- function(Y, X, i, dX, ell=NULL, sigFSq=NULL, La=NULL, b=c(5.4,13), ...) {
  fudgeFactor <- 1000
  if (is.null(La)) La <- dX/10 # Arbitrary constant in case La is unspecified; if found to be poor, it will get optimized anyway
  lower <- upper <- c(0,0)
  choosePoints <- function() {
    a <<- jumpFactor(La, X, i)
    frPts <<- focusRegion(X=X, i=i, dX=dX, a=a, ...)
    sqrtY <<- sqrt(Y[frPts]) # If Y is Poisson, sqrt(Y) is roughly Gaussian(sig^2 = 1/4)
    lower[] <<- c(log(b[1]*La), -Inf)
    upper[] <<- c(log(b[2]*La), log(fudgeFactor*var(sqrtY)))
  }
  choosePoints()
  if (is.null(ell)) ell <- dX
  if (is.null(sigFSq)) sigFSq <- var(sqrtY)
  params <- log(c(ell, sigFSq))
  names(params) <- c("logEll", "logSigmaFSq")
  constParams <- log(0.25)
  names(constParams) <- "logSigmaNSq"
  names(lower) <- names(upper) <- names(params)

  hypers <- hypersFromParamsSE(params, constParams)
  KType <- "SE" # We're fitting to a simple SE model here, i.e. constant ell
  KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)
  repeat {
    opt <- optim(par=params, fn=logMLSE, gr=gradLogMLForSE,
      control=list(fnscale=-1, trace=0, maxit=400), lower=lower, upper=upper, method="L-BFGS-B",
      # Extra parameters needed for fn and gr:
      X=X[frPts], Y=sqrtY, constParams=constParams, KType=KType, KEnvir=KEnvir)
    epsilon <- 1e-4*(b[2]-b[1])*La/exp(c(lower[1], upper[1]))
    names(epsilon) <- c("L", "U")
    # Too close to the upper limit?  Rescale and recompute...
    LastLa <- La
    if (opt$par["logEll"] > upper[1] - epsilon["U"]) {
      La <- 2*b[2]*La/(b[1]+b[2])
    }
    # Too close to the lower limit?  Rescale and recompute...
    if (opt$par["logEll"] < lower[1] + epsilon["L"]) {
      La <- 2*b[1]*La/(b[1]+b[2])
    }
    # Also: we're suspicious when sigmaF is close to its limit.
    if (opt$par["logSigmaFSq"] > upper[2] - 1e-2) {
      fudgeFactor <- fudgeFactor / 1.5
    }
    if (La == LastLa) break
    cat(sprintf("For X=%.2f, had to change La from %.3f to %.3f.\n", X[i], LastLa, La))
    choosePoints()
    # NOTE: If I have bugs later, it's probably because I didn't keep track of
    # which scales I've tried, in the hope that it won't be necessary.  i.e. My
    # procedure for choosing a new La *definitely* needs some work.
  }

  # "sum" looks a bit weird below; the only reason I'm using it is to get rid
  # of the distracting "names" attribute
  bestParams <- list(ell=sum(exp(opt$par["logEll"])), sigmaFSq=sum(exp(opt$par["logSigmaFSq"])), La=La)
  return (bestParams)
}

XSamplePoints <- function(X, dX, densityFactor=1, ...) {
  N <- length(X)
  nPts <- floor(densityFactor*(X[N]-X[1])/dX)
  XVals <- seq(from=X[1], to=X[N], length.out=nPts)
  return (findInterval(XVals, X))
}

localHyperSamples <- function(Y, X, dX, wordy=FALSE, ...) {
  fname <- sprintf("lastHypers_%s", quickTimestamp())
  Xi <- XSamplePoints(X, dX, ...)
  La <- NULL
  Ni <- length(Xi)
  hypers <- matrix(NA, nrow=Ni, ncol=3, dimnames=list(NULL, c("X", "ell", "sigmaFSq")))
  for (ii in 1:Ni) {
    i <- Xi[ii]
    f <- focusRegionFit(Y, X, i, dX, La=La, ...)
    hypers[ii,] <- c(X[i], f$ell, f$sigmaFSq)
    if (wordy)
      outputForGnuplot(hypers, fname)
    La <- f$La
  }
  return (hypers)
}

hyperLists <- function(Y, X, dXVals, ...) {
  nDX <- length(dXVals)
  hList <- list()
  for (iDX in 1:nDX) {
    dX <- dXVals[iDX]
    newlist <- list(localHyperSamples(Y=Y, X=X, dX=dX, ...))
    names(newlist) <- dX
    hList <- c(hList, newlist)
  }
  return (hList)
}

visualizeHyperLists <- function(hList) {
  xMin <- 1e18
  yMax <- xMax <- -xMin
  N <- length(hList)
  for (i in 1:N) {
    xMin <- min(xMin, hList[[i]][,"X"])
    xMax <- max(xMax, hList[[i]][,"X"])
    yMax <- max(yMax, hList[[i]][,"ell"])
  }
  plot(NULL, xlim=c(xMin,xMax), ylim=c(0,yMax), main="ell(Q) for different dX", xlab="X", ylab="ell")
  for (i in 1:N) {
    points(hList[[i]][,"X"], hList[[i]][,"ell"], type="b", col=i, pch=16)
  }
  legend(x="topleft", legend=sprintf("dX = %s", names(hList)), col=1:N, pch=rep(16,N))
}

# Quick-and-dirty function to re-do the optimization of parameters for a given
# set of locally-sampled hyperparameter values.  Reason I'm writing this: the
# GP had previously been based on a linear interpolation of ell(Q), but I need
# a log-based interpretation to ensure it's always positive.
reOptimizeParams <- function(aggHypers, sigmaNSq=0.25) {
  if (length(which(colnames(aggHypers)=="logEll")) < 1)
    aggHypers <- cbind(aggHypers, logEll=log(aggHypers[,"ell"]))
  params <- GPGaussianGenericMaxML(X=aggHypers[,"X"], Y=aggHypers[,"logEll"])
  return (list(samplePoints=aggHypers, params=params, logSigmaNSq=log(sigmaNSq)))
}

# Given noisy data Y evaluated at points X, subject to the hyperparameters
# (i.e. ell and sigma_f) in hyperLists, what is the mean denoised function?  If
# a K environment is supplied, with precomputed matrices, the computations can
# be sped up considerably.
#   Returns: underlying function u, and environment KEnvir.
meanDenoisedFunction <- function(X, Y, hypers, KType="SEVaryingEll",
  KEnvir=constructKEnvir(X=X, KType=KType, hypers=hypers), ...)
{
  return (list(u=(smartKnnKInv(KEnvir, X, KType, hypers) %*% sqrt(Y))^2, KEnvir=KEnvir))
}

randomDenoisedFunction <- function(X, Y, hypers, KType="SEVaryingEll",
  rand=rnorm(n=length(X)),
  KEnvir=constructKEnvir(X=X, KType=KType, hypers=hypers), ...)
{
  mu <- meanDenoisedFunction(X,Y,hypers,KType,KEnvir)$u
  draw <- (sqrt(mu) + smartL(KEnvir, X, KType, hypers) %*% rand)^2
  return (list(u=draw, KEnvir=KEnvir))
}

# Return the optimal hyperparameters where the marginal likelihood is based on
# ribbon slope, and the prior is based on hypocrisy.
optimalHypersFocusRegionsHypocrisy <- function(X, Yg, frList, tauSq) {
  # Fitness function
  frX <- focusRegionXVals(X=X, frList=frList)
  f <- function(logEll, tauSq, X, Yg, frList, frX) {
    names(logEll) <- rep("logEll", length(logEll))
    LML <- 0
    for (Ri in 1:length(frList)) {
      idxs <- frList[[Ri]]$allPts
      LML <- LML + logMLSE(params=logEll[Ri], constParams=constParams, KType=KType,
        X=X[idxs], Y=Yg[idxs])
    }
    hyp <- continuousHypocrisy(X=frX, ell=exp(logEll), tauSq=tauSq)
    return (LML - hyp)
  }
  # Gradient of fitness function
  g <- function(logEll, tauSq, X, Yg, frList, frX) {
    # Start out with the negative gradient of hypocrisy (want grad w.r.t.
    # log(ell), so premultiply through by ell)
    names(logEll) <- rep("logEll", length(logEll))
    grad <- - exp(logEll) * numericalHypocrisyGradient(frX, exp(logEll), tauSq)
    KEnvir <- constructKEnvir(X, KType, hypers=list())
    for (Ri in 1:length(frList)) {
      idxs <- frList[[Ri]]$allPts
      grad[Ri] <- grad[Ri] + gradLogMLForSE(params=logEll[Ri], constParams=constParams,
        KEnvir=KEnvir, KType=KType, X=X[idxs], Y=Yg[idxs])
    }
    return (grad)
  }
  # Start each point at its optimal location for that focus region
  OAFR <- optimizeAllFocusRegions(X=X, Yg=Yg, frList=frList)
  KType <- "SE"
  startingEll <- OAFR$ell
  logEll <- log(startingEll)
  constParams <- log(c(OAFR$sigmaFSq, 0.25))
  names(constParams) <- c("logSigmaFSq", "logSigmaNSq")
  logEllBnds <- range(logEll)

  # Find and return optimal ell(Q)
  opt <- optim(par=logEll, fn=f, gr=g, method="L-BFGS-B",
    lower=logEllBnds[1], upper=logEllBnds[2],
    control=list(fnscale=-1, trace=0, maxit=400),
    # Extra parameters needed for fn and gr:
    X=X, Yg=Yg, tauSq=tauSq, frList=frList, frX=frX)
  fullResLogEll <- spline(x=frX, y=opt$par, xout=X)$y
  return (hypersForSEFullResEll(logEll=fullResLogEll, sigmaFSq=OAFR$sigmaFSq))
}

######################################################################
# EXPLORATORY FUNCTIONS

# Fit a full-resolution subset of the data with a constant-ell GP.  Then,
# consider it as a varying-ell GP, and calculate the gradient of the marginal
# likelihood w.r.t. each ell-value.
localEllFullRes <- function(X, Y, subrange=(1:length(X)), True=NULL, gradStopRatio=1e-3, fNameLabel="fullResEllOfQ", ellMax=diff(range(X)), SigFactor=1, maxIter=100) {
  KType <- "SEFullResEll"
  X <- X[subrange]
  Y <- sqrt(Y[subrange])
  N <- length(subrange)
  params <- GPGaussianGenericMaxML(X, Y, sigmaNSq=0.25, constNoise=T)
  sigmaFSq <- sum(exp(params["logSigmaFSq"]))
  logEll <- rep(sum(params["logEll"]), N)
  ell <- exp(logEll)
  hypers <- hypersForSEFullResEll(logEll, sigmaFSq)
  KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)
  iter <- 0
  legendInfo <- data.frame(
    lty=rep(1,6),
    legend=c("ell(Q)", "grad[ell(Q)]", "grad[ell(Q)] (smoothed)", "Noisy data", "True function", "Bayesian-denoised function"),
    pch=c(-1,1,rep(-1,4)),
    col=c("black", "gray", "blue", "gray", "black", "red"),
    lwd=c(2,1,2,1,2,2))
  grad <- gradLogMLForSEFullResEll(hypers, KType=KType, X, Y, KEnvir=KEnvir)
  gradEll <- quickGaussianBlur(Y=grad, X=X, Sigma=ell*SigFactor)
  gScale <- ellMax * 0.2
  gFactor <- gScale / max(abs(grad))
  gFactor2 <- gScale / max(abs(gradEll))
  uScale <- ellMax * 0.6
  uFactor <- uScale / max(Y)^2
  gMag <- sqrt(gradEll %*% gradEll)
  gMagStop <- gMag * gradStopRatio
  baseFName <- sprintf("%s_%s", fNameLabel, quickTimestamp())

  # Evaluate log(ML) for a given step size
  f <- function(alpha, hypers, ellSteps) {
    hypers$logEll <- hypers$logEll + alpha * ellSteps
    return (logML(hypers=hypers, KType=KType, X=X, Y=Y))
  }
  alphaMax <- 1/max(abs(gradEll)) # (single step shouldn't change ell by more than factor of e)
  LML <- f(0, hypers, gradEll)

  while (gMag > gMagStop) {
    U <- smartKnnKInv(KEnvir, X, KType, hypers) %*% Y
    # Output PNG for this iteration
    png(filename=sprintf("%s_%04d_%s.png", baseFName, iter, quickTimestamp()), width=800, height=800)
    plot(NULL, xlim=range(X), ylim=c(-gScale, 1.4*ellMax+uScale), xlab="Q (1/A)", ylab="Various quantities (scale is for ell(Q) (1/A))", main=sprintf("Finding full-res ell(Q); SigFactor=%.2f; iteration %d",SigFactor,iter))
    points(X, exp(hypers$logEll), type="l", col=legendInfo$col[1], lwd=legendInfo$lwd[1])
    points(X, grad*gFactor, type="b", col=legendInfo$col[2], lwd=legendInfo$lwd[2])
    points(X, gradEll*gFactor2, type="l", col=legendInfo$col[3], lwd=legendInfo$lwd[3])
    points(X, uFactor*Y^2+ellMax, type="l", col=legendInfo$col[4], lwd=legendInfo$lwd[4])
    if (!is.null(True))
      points(X, uFactor*True[subrange]+ellMax, type="l", col=legendInfo$col[5], lwd=legendInfo$lwd[5])
    points(X, uFactor*U^2+ellMax, type="l", col=legendInfo$col[6], lwd=legendInfo$lwd[6])
    abline(h=0, col="gray")
    abline(h=ellMax, col="gray")
    legend(x="topleft", pch=legendInfo$pch, lty=legendInfo$lty, legend=legendInfo$legend, col=legendInfo$col, lwd=legendInfo$lwd)
    dev.off()
    if (isTRUE(iter >= maxIter)) {
      cat (sprintf("Reached maximum of %d iterations; stopping\n", maxIter))
      break
    }
    # Take optimal step, and prepare the next iteration
    alphaMin <- 0
    opt <- optimize(f=f, interval=c(alphaMin, alphaMax), maximum=T, tol=0.001,
      hypers=hypers, ellSteps=gradEll)
    lastLML <- LML
    LML <- opt$objective
    if (LML < lastLML) {
      cat (sprintf("log(ML) *decreased* by %.2e!  Stopping...\n", (lastLML-LML)))
      break
    }
    iter <- iter + 1
    hypers$logEll <- hypers$logEll + opt$maximum * gradEll
    grad <- gradLogMLForSEFullResEll(hypers, KType=KType, X, Y, KEnvir=KEnvir)
    gradEll <- quickGaussianBlur(Y=grad, X=X, Sigma=ell)
    gMag <- sqrt(gradEll %*% gradEll)
    cat(sprintf("log(ML; iter. #%4d) = %.3e;\tgMag = %.2e\n", iter, opt$objective, gMag))
  }
  return (gradEll)
}

exploreDX <- function(X, Y, iVals, dNumPts=20, maxNumPts=400, ...) {
  numPts <- dNumPts
  N <- length(X)
  numIter <- round(maxNumPts/dNumPts)
  dX <- sigFVals <- ellVals <- matrix(NA, byrow=T, nrow=numIter, ncol=length(iVals))
  plotProgress <- function(myMat, mainTitle) {
    xScale <- max(dX[which(!is.na(dX))])
    yScale <- max(myMat[which(!is.na(myMat))])
    plot(dX[,1], myMat[,1], xlab="dX", ylab="", main=mainTitle, type="b", xlim=c(0,xScale), ylim=c(0,yScale))
    if (length(iVals) > 1) {
      for (ii in 2:length(iVals)) {
        points(dX[,ii], myMat[,ii], col=ii, type="b")
      }
    }
  }
  for (iter in 1:numIter) {
    numPts <- iter * dNumPts
    for (ii in 1:length(iVals)) {
      i <- iVals[ii]
      dX[iter, ii] <- (X[min(i+numPts,N)] - X[max(i-numPts,1)])/2
      f <- focusRegionFit(Y=Y, X=X, i=i, dX=dX[iter, ii], ...)
      ellVals[iter, ii] <- f$ell
      sigFVals[iter, ii] <- f$sigmaFSq
      par(mfrow=c(2,1))
      plotProgress(ellVals, "ell")
      plotProgress(sigFVals, "sigmaF squared")
      png(filename="exploreDX.png", width=800, height=800)
      par(mfrow=c(2,1))
      plotProgress(ellVals, "ell")
      plotProgress(sigFVals, "sigmaF squared")
      dev.off()
    }
  }
}

exploreA <- function(X, Y, iVals, aVals, dX=0.25, ...) {
  N <- length(X)
  numA <- length(aVals)
  sigFVals <- ellVals <- matrix(NA, byrow=T, nrow=numA, ncol=length(iVals))
  plotProgress <- function(myMat, mainTitle) {
    xScale <- c(min(aVals), max(aVals))
    yScale <- max(myMat[which(!is.na(myMat))])
    plot(aVals, myMat[,1], xlab="a", ylab="", main=mainTitle, type="b", xlim=xScale, ylim=c(0,yScale))
    if (length(iVals) > 1) {
      for (ii in 2:length(iVals)) {
        points(aVals, myMat[,ii], col=ii, type="b")
      }
    }
  }
  for (ai in 1:length(aVals)) {
    a <- aVals[ai]
    for (ii in 1:length(iVals)) {
      i <- iVals[ii]
      f <- focusRegionFit(Y=Y, X=X, i=i, dX=dX, a=a, ...)
      ellVals[ai, ii] <- f$ell
      sigFVals[ai, ii] <- f$sigmaFSq
      png(filename="exploreA.png", width=800, height=800)
      par(mfrow=c(2,1))
      plotProgress(ellVals, "ell")
      plotProgress(sigFVals, "sigmaF squared")
      dev.off()
      par(mfrow=c(1,1))
    }
  }
}

searchUniversalBounds <- function(X, Y, iVals, LaVals, dX=0.25, datasetDescription, ...) {
  N <- length(X)
  numLa <- length(LaVals)
  numI <- length(iVals)
  ellVals <<- matrix(NA, byrow=T, nrow=numLa, ncol=numI, dimnames=list(round(LaVals,3), sprintf("X=%.2f", X[iVals])))
  TS <- quickTimestamp()

  plotProgress <- function() {
    png(filename=sprintf("exploreLa_%s.png", TS), width=800, height=800)
    xScale <- c(LaVals[1], LaVals[numLa])
    yScale <- c(0, min(2.5, max(ellVals[which(!is.na(ellVals))])))
    # Setup the plot
    plot(NULL, main=sprintf("Looking for universal bounds; %s", datasetDescription), xlim=xScale, ylim=yScale, xlab="Attenuation length L_a (1/A)", ylab="Measured correlation length ell (1/A)")
    # Plot a few guidance lines
    for (b in c(0.25, 0.5, 1.0, 2.0, 4.0)) abline(a=0, b=b, col="gray")
    # Plot the curves-so-far
    for (ii in 1:numI)
      points(LaVals, ellVals[,ii], col=ii, type="b")
    # Add a legend
    legend("topright", pch=rep(1, numI), col=1:numI, legend=sprintf("X = %.2f", X[iVals]))
    dev.off()
  }

  for (Lai in 1:length(LaVals)) {
    La <- LaVals[Lai]
    for (ii in 1:length(iVals)) {
      i <- iVals[ii]
      Delta <- ifelse(test=(i>1), yes=(X[i] - X[i-1]), no=(X[i+1] - X[i]))
      a <- exp(Delta/La)
      ellVals[Lai, ii] <<- focusRegionFit(Y=Y, X=X, i=i, dX=dX, a=a, ...)$ell
      plotProgress()
    }
  }
}

# Assume the data 'Y', from the points 'X', have Gaussian noise with variance
# 'sigmaNSq'.  And, assume they're described by a GP with known varying
# 'ell(X)'.  This function returns an environment whose hyperparameters
KEnvirBestFullResEll <- function(X, Y, ell, sigmaNSq = 0.25) {
  sigmaFSqBnds <- c(0, diff(range(Y))^2)
  hypers <- hypersForSEFullResEll(log(ell), mean(sigmaFSqBnds), sigmaNSq=sigmaNSq)
  KType <- "SEFullResEll"
  KEnvir <- constructKEnvir(X, KType, hypers)

  f <- function(sigmaFSq) {
    hypers$sigmaFSq <- sigmaFSq
    return (logML(hypers=hypers, KType=KType, X=X, Y=Y, KEnvir=KEnvir))
  }

  opt <- optimize(f=f, interval=sigmaFSqBnds, maximum=T, tol=0.001*sigmaFSqBnds[2])
  return (KEnvir)
}

# Returns a matrix whose (i,j) component is the derivative of Knn[i,j] w.r.t.
# the logarithm of ell[j].  i.e. each column of this matrix is the set of
# derivatives for a single parameter.
columnwiseKDerivsSEFullResEll <- function(Knn, X, ell) {
  N <- length(X)
  ell2 <- ell^2
  KDerivs <- Knn # Copy structure
  for (j in 1:N)
    KDerivs[,j] <- Knn[,j] * ((ell2 - ell2[j])/(2*(ell2 + ell2[j])) + 2*((X - X[j])^2*ell2[j])/((ell2 + ell2[j])^2))
  return (KDerivs)
}
gradientVariance <- function(X, Y, KEnvir=KEnvir, KType="SEFullResEll", hypers, idx=1:length(X)) {
  eta <- columnwiseKDerivsSEFullResEll(Knn=smartKNoNoise(KEnvir=KEnvir, X=X, KType=KType, hypers=hypers), X=X, ell=exp(hypers$logEll)) * (1.0 - 0.5*diag(length(X)))
  U <- Y # Approximation
  KInv <- smartKInv(KEnvir=KEnvir, X=X, KType=KType, hypers=hypers)
  UUT <- U %*% t(U)
  gVar <- U[idx] # Copy structure
  for (i in 1:length(idx)) {
    j <- idx[i]
    B <- KInv[,j] %*% t(eta[,j]) %*% KInv
    UUTB <- UUT %*% B
    S <- sum(diag(B)) + UUTB + B %*% UUT + sum(diag(UUTB))
    gVar[i] <- 0.25 * (KInv[j,] %*% S %*% KInv %*% eta[,j])
  }
  return (gVar)
}

scanFocusedSubsets <- function(X, Y, nInt, LExtFactor=0.1, stepFactor=0.25) {
  N <- length(X)
  La <- LExtFactor*nInt
  first <- round(seq(from=1, to=N-nInt, by=stepFactor*nInt))
  for (f in first) {
    FS <- focusedSubset(X, nInt, La, f)
    plotTitle <- sprintf("Focused subset: n=%d points, decay constant %d points", nInt, La)
    png(filename=sprintf("focusRegionXray_%05d.png", f), width=800, height=800)
    plot(xlim=range(X), ylim=range(c(0,Y)), main=plotTitle, xlab="Q(1/A)", ylab="I (counts)",
      X[FS$intPts], Y[FS$intPts], type="l")
    points(X[FS$extPts], Y[FS$extPts], type="p")
    legend(x="topright", pch=c(-1,1), legend=c("Internal points", "External points"), lty=c(1,-1))
    dev.off()
  }
}

testEffectOfEll <- function(ellVals=0.005*(2:14), X, trueData, subrange=6000:6300, TS=quickTimestamp(), nDatasets=100) {
  setScale <- function(R1, R2=R1) {
    f <<- 1
    buff <<- 1
    yRange <<- R2*c(-1, 1+f+buff)
  }

  La <- length(subrange)/7.0
  LaStr <- pointToP(La, 4)
  FS <- focusedSubset(X=X, nInt=length(subrange), LExtPts=La, firstI=subrange[1])
  dataRange <- range(trueData[FS$intPts])
  XInt <- X[FS$intPts]
  AP <- FS$allPts
  XAll <- X[AP]
  N <- length(XAll)

  # Generate the datasets
  noisy <- matrix(rpois(n=N*nDatasets, lambda=trueData[AP]), nrow=nDatasets,
    ncol=length(XAll), byrow=T)

  grad <- matrix(0, nrow=nDatasets, ncol=length(XInt))
  for (ell in ellVals) {
    logEll <- log(ell)
    label <- sprintf("ell-%s_%s", pointToP(ell, 3), TS)
    # Calculate gradient w.r.t. each of them
    hypers <- hypersForSEFullResEll(logEll=rep(logEll,N), sigmaFSq=2^2)
    KEnvir <- constructKEnvir(X=XAll, KType="SEFullResEll", hypers=hypers)
    for (i in 1:nDatasets)
      grad[i,] <- gradLogMLForSEFullResEll(hypers=hypers, X=XAll, Y=sqrt(noisy[i,]), KEnvir=KEnvir, indices=FS$intIdx)
    mu <- apply(grad, 2, mean)
    sigma <- apply(grad, 2, sd)
    Var <- sigma^2

    # Output SiZer diagnostic for each
    hMin <- min(diff(XInt))
    hMax <- ell
    hVals <- seq(from=hMin, to=hMax, length.out=21)
    for (i in 1:nDatasets) {
      png(filename=sprintf("SiZer_%s_%05d.png", label, i), width=800, height=800)
      plot.SiZer(SiZer(XInt, h=hVals, grad[i,], x.grid=XInt[seq(from=1, to=length(XInt), by=3)]))
      dev.off()
    }

    R2 <- max(ellVals)*1.1
    # Plot the mean +- sigma plot
    R1 <- 20; varScale <- R2/R1
    setScale(R1=R1, R2=R2)
    png(filename=sprintf("sweepOutEll_mean_%s.png", label), width=800, height=800)
    plot(NULL, xlim=range(XInt), ylim=yRange, main="Mean gradient, with uncertainty")
    abline(h=0, col="black")
    points(XInt, rep(ell, length(XInt)), col="darkblue", type="l", lwd=2)
    points(XInt, varScale*(mu), type="l", col="blue", lwd=2)
    points(XInt, varScale*(mu+sigma), type="l", col="lightblue")
    points(XInt, varScale*(mu-sigma), type="l", col="lightblue")
    scaleFactor <- R2*f/diff(dataRange)
    points(XInt, R2*(1+buff)+(noisy[1,FS$intIdx]-dataRange[1])*scaleFactor, type="l", col="gray")
    points(XInt, R2*(1+buff)+(trueData[FS$intPts]-dataRange[1])*scaleFactor, type="l", col="black", lwd=2)
    dev.off()

    gradientGuesses <- grad
    dX <- mean(diff(XInt))
    myScales <- (1:ceiling(ell/dX))^2
    params <- rep(log(0.25),3)
    names(params) <- c("logEll", "logSigmaFSq", "logSigmaNSq")
    indices <- sample(1:nDatasets, 20)
    for (i in indices) {
      ss1D <- scaleSpaceFeatures1D(X=XInt, Y=grad[i,], tVals=myScales)
      cat(sprintf("guessing dataset %d...", i))
      params <- GPFixedEll(XInt, grad[i,], dX*max(ss1D$scaleMax$sqrtT))$hypers
      # Now, blur it with the requested ell
      hypers <- hypersFromParamsSE(params)
      gradientGuesses[i,] <- smartKnnKInv(KEnvir, XInt, "SE", hypers) %*% grad[i,]
      cat("done!\n")
    }
    png(filename=sprintf("gradientGuesses_%s.png", label), width=800, height=800)
    plot(xlim=range(XInt), ylim=max(abs(c(gradientGuesses, (mu+sigma), (mu-sigma))))*c(-1,1),
      xlab="Q (1/A)", ylab="Gradient", main=sprintf("Gradient guesses; ell=%.3f", ell), NULL)
    for (i in indices)
      points(type="l", col=which(i==indices), XInt, gradientGuesses[i,])
    points(type="l", col="black", lwd=2, XInt, mu)
    points(type="l", col="lightblue", lwd=1, XInt, mu + sigma)
    points(type="l", col="lightblue", lwd=1, XInt, mu - sigma)
    dev.off()

    indices <- round(runif(n=20, 1, nDatasets))
    R1 <- 20; varScale <- R2/R1
    setScale(R1=R1, R2=R2)
    #for (i in 1) {
    for (i in 1:length(indices)) {
      idx <- indices[i]
      ss1D <- scaleSpaceFeatures1D(X=XInt, Y=grad[i,], tVals=myScales)
      plotScaleSpaceFeatures1D(ssPts=ss1D, X=XInt, Y=grad[i,], id=idx, ell=ell, baseFName=sprintf("ssFeatures_%s", label))
      #png(filename=sprintf("scaleSpaceTest_%s_%d.png", label, idx), width=800, height=800)
      #plot(NULL, xlim=range(XInt), ylim=yRange, main=sprintf("Gradient, dataset %d, %s", idx, label))
      #abline(h=0, col="black")
      ##points(XInt, rep(ell, length(XInt)), col="darkblue", type="l", lwd=2)
      #points(XInt, varScale*(mu), type="l", col="blue", lwd=2)
      #points(XInt, varScale*(mu+sigma), type="l", col="lightblue")
      #points(XInt, varScale*(mu-sigma), type="l", col="lightblue")
      #points(XInt, varScale*grad[idx,], type="b", col="green")
      #scaleFactor <- R2*f/diff(dataRange)
      #points(XInt, R2*(1+buff)+(noisy[idx,FS$intIdx]-dataRange[1])*scaleFactor, type="l", col="gray")
      #points(XInt, R2*(1+buff)+(trueData[FS$intPts]-dataRange[1])*scaleFactor, type="l", col="black", lwd=2)
      #dev.off()
      #png(filename=sprintf("seeSingleGradientONLY_%s_%d.png", label, idx), width=800, height=800)
      #plot(NULL, xlim=range(XInt), ylim=yRange, main=sprintf("Gradient, dataset %d, %s", idx, label))
      #abline(h=0, col="black")
      ##points(XInt, rep(ell, length(XInt)), col="darkblue", type="l", lwd=2)
      #points(XInt, varScale*grad[idx,], type="b", col="green")
      #scaleFactor <- R2*f/diff(dataRange)
      #points(XInt, R2*(1+buff)+(noisy[idx,FS$intIdx]-dataRange[1])*scaleFactor, type="l", col="gray")
      #dev.off()
      #hypers <- hypersForSEFullResEll(logEll=rep(logEll,length(XInt)), sigmaFSq=2^2)
      #U <- (smartKnnKInv(KEnvir, XInt, "SEFullResEll", hypers) %*% sqrt(noisy[idx,FS$intIdx]))^2
      ##for (n in 2*(0:10)) {
      ##  png(filename=sprintf("grad_%s_blur-%02d.png", label, n), width=800, height=800)
      ##  R <- 40*c(-1,1)
      ##  qG <- quickGaussianBlur(grad[idx,], XInt, constSigma=n*mean(diff(XInt)))
      ##  plot(main=sprintf("Gradient blurred by about %d points, dataset %d, %s", n, idx, label), xlim=range(XInt), ylim=R, xlab="Q (1/A)", ylab="Gradient", NULL)
      ##  for (idx in indices)
      ##    points(type="l", col=idx%%20, XInt, quickGaussianBlur(grad[idx,],XInt,constSigma=n*mean(diff(XInt))))
      ##  points(XInt, (mu), type="l", col="blue", lwd=2)
      ##  points(XInt, (mu+sigma), type="l", col="lightblue")
      ##  points(XInt, (mu-sigma), type="l", col="lightblue")
      ##  #points(XInt, rep(ell, length(XInt)), col="darkblue", type="l", lwd=2)
      ##  abline(h=0)
      ##  dev.off()
      ##}
    }
  }

  # Plot the variance statistics
  R1 <- sqrt(200); varScale <- R2/R1
  setScale(R1=R1, R2=R2)
  png(filename=sprintf("sweepOutEll_variance_%s.png", label), width=800, height=800)
  plot(NULL, xlim=range(XInt), ylim=yRange, main="Standard deviation of gradient", xlab="Q (1/A)", ylab="ell(Q)")
  points(XInt, -sigma*varScale, col="darkgreen", type="l", lwd=2)
  points(XInt, rep(ell, length(XInt)), col="darkblue", type="l", lwd=2)
  scaleFactor <- R2*f/diff(dataRange)
  abline(h=0)
  points(XInt, R2*(1+buff)+(noisy[1,FS$intIdx]-dataRange[1])*scaleFactor, type="l", col="gray")
  points(XInt, R2*(1+buff)+(trueData[FS$intPts]-dataRange[1])*scaleFactor, type="l", col="black", lwd=2)
  dev.off()

}
#}

makeFigureShowingBumpInEll <- function(ell0=30, dX=1.5, w=0.05, weight=0.02, nPts=200, avgNCnts=10, sigmaFSq=8^2, bumpRatio=0.25) {
  oneMinusBR <- 1-bumpRatio
  X <- seq(from=0, to=dX, length.out=nPts)
  XMid <- dX/2
  ell1 <- rep(ell0, nPts)
  ell2 <- ell1 + ell0*weight*exp(-(X-XMid)^2/(2*w^2))/sqrt(2*pi*w^2)
  plotRange <- range(c(X,ell1,ell2))

  # Now, make our "true" function and noisy function
  hypers <- hypersForSEFullResEll(logEll=log(ell2), sigmaFSq=sigmaFSq, sigmaNSq=200)
  KType <- "SEFullResEll"
  KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)
  set.seed(2)
  realData <- GaussToPoisson(rep(avgNCnts, nPts) + smartL(KEnvir, X, KType, hypers) %*% rnorm(n=nPts))
  set.seed(3000)
  noisy <- rpois(n=nPts, lambda=realData)
  plot(main="Effect of ell(Q) fluctuations on structure fitting", xlab="", ylab="Intensity (no. of counts)", X, noisy, type="l", col="gray")
  points(X, realData, type="l")
  legend(x="topleft",
    legend=c("Noisy data", "True function", "Full ell(Q) bump", "25% ell(Q) bump", "No ell(Q) bump"),
    lwd=c(1,1,2,2,2),
    lty=c(1,1,1,1,1),
    col=c("gray", "black", "blue", "purple", "red"),
    pch=c(-1,-1,-1,-1,-1))

  # How does denoising do with each one?
  hypersGood <- hypersForSEFullResEll(logEll=log(ell2), sigmaFSq=sigmaFSq)
  hypersOK <- hypersForSEFullResEll(logEll=log(oneMinusBR*ell1+bumpRatio*ell2), sigmaFSq=sigmaFSq)
  hypersBad <- hypersForSEFullResEll(logEll=log(ell1), sigmaFSq=sigmaFSq)
  noisyGauss <- PoissonToGauss(noisy)
  functionGood <- GaussToPoisson(smartKnnKInv(KEnvir, X, KType, hypersGood) %*% noisyGauss)
  functionOK <- GaussToPoisson(smartKnnKInv(KEnvir, X, KType, hypersOK) %*% noisyGauss)
  functionBad <- GaussToPoisson(smartKnnKInv(KEnvir, X, KType, hypersBad) %*% noisyGauss)
  points(X, functionGood, lwd=2, type="l", col="blue")
  points(X, functionOK  , lwd=2, type="l", col="purple")
  points(X, functionBad , lwd=2, type="l", col="red")

# plot(X, ell2, type="l", col="blue", ylim=plotRange, main="ell(Q) with fluctuation")
# points(X, ell1, type="l", col="red")
# points(X, oneMinusBR*ell1+bumpRatio*ell2, type="l", col="purple")
# legend(x="bottomright",
#   lty=c(1,1),
#   pch=c(-1,-1),
#   col=c("blue", "purple", "red"),
#   legend=c("Ell(Q) with full bump", "Ell(Q) with 25% bump", "Ell(Q) with no bump"))

}

# Find the probability ribbon (log(ML)) w.r.t. ell for the given data.
ellRibbon <- function(X, Yg, ellVals) {
}

# Return a grid of log(ML) values, representing probability ribbons for each
# focus region
focusRegionRibbons <- function(X, Yg, frList, nGridPts=30, ellCap=99999) {
  OAFR <- optimizeAllFocusRegions(X, Yg, frList)
  logEllBnds <- log(range(OAFR$ell))
  logEllVals <- seq(from=logEllBnds[1], to=min(log(ellCap), logEllBnds[2]), length.out=nGridPts)
  nReg <- length(frList)
  ribbons <- matrix(0, nrow=nGridPts, ncol=nReg)
  cat(quickTimestamp(), "\n")
  for (frI in 1:nReg) {
    idxs <- frList[[frI]]$allPts
    for (ellI in 1:nGridPts) {
      hypers <- hypersForSE(logEll=logEllVals[ellI], sigmaFSq=OAFR$sigmaFSq)
      ribbons[ellI, frI] <- logML(hypers, KType="SE", X=X[idxs], Y=Yg[idxs]) - OAFR$logML[frI]
    }
  }

  # Find local maxima above zero, and correct the scale
  logMLOffset <- OAFR$logML
  bestEll <- OAFR$ell
  for (badXI in unique(which(ribbons > 0, arr.ind=T)[,"col"])) {
    idxs <- frList[[frI]]$allPts
    badXEllI <- which(ribbons[,badXI]==max(ribbons[,badXI]))
    # Could be a problem with the following if we're at one of the endpoints...
    localBnds <- exp(c(logEllVals[badXEllI-1], logEllVals[badXEllI+1]))
    BH <- bestHypersConstantEllGaussian(X=X[idxs], Yg=Yg[idxs], ellMin=localBnds[1], ellMax=localBnds[2])
    ribbons[,badXI] <- ribbons[,badXI] + OAFR$logML[badXI] - BH$logML
    bestEll[badXI] <- BH$ell
    logMLOffset[badXI] <- BH$logML
  }

  cat(quickTimestamp(), "\n")
  return (list(ribbons=ribbons, logMLOffset=logMLOffset,
      X=focusRegionXVals(X=X, frList=frList), bestEll=bestEll, ell=exp(logEllVals)))
}

# Assume the data Yg(X) is divvied up into focus regions as in 'frList', and
# each is optimized with constant-ell: which focus region has the highest ell?
# Return that ell.
maxFocusRegionLengthscale <- function(X, Yg, frList) {
  # Just some setup code
  nReg <- length(frList)
  KType <- "SE"
  gradient <- function(idxs, hypers, KEnvir) {
    params <- hypers$logEll
    names(params) <- "logEll"
    constParams <- c(hypers$logSigmaFSq,log(0.25))
    names(constParams) <- c("logSigmaFSq", "logSigmaNSq")
    return (gradLogMLForSE(params=params, constParams=constParams, KType=KType, KEnvir=KEnvir, X=X[idxs], Y=Yg[idxs]))
  }
  # Find optimal lengthscale for first focus region
  idx <- frList[[1]]$allPts
  bestHypers <- bestHypersConstantEllGaussian(X[idx], Yg[idx])
  hypers <- hypersForSE(ell=bestHypers$ell, sigmaFSq=bestHypers$sigmaFSq)
  KEnvir <- constructKEnvir(X=X[idx], KType=KType, hypers=hypers)
  # Now, check all the other focus regions
  for (i in nReg:2) {
    # Skip to next, unless gradient is positive (of course, we need the best
    # sigmaFSq to evaluate the gradient)
    idxs <- frList[[i]]$allPts
    hypers$logSigmaFSq <- log(bestSigmaFSq(X[idxs], Yg[idxs], exp(hypers$logEll)))
    GLMLFSE <- gradient(idxs, hypers=hypers, KEnvir=KEnvir)
    if (GLMLFSE > 0) {
      # Optimize ell (with LOWER LIMIT) and sigmaFSq simultaneously
      newBest <- bestHypersConstantEllGaussian(X=X[idxs], Yg=Yg[idxs], ellMin=exp(hypers$logEll), ellMax=10*diff(range(X)))
      hypers$logEll <- log(newBest$ell)
      hypers$logSigmaFSq <- log(newBest$sigmaFSq)
    }
  }
  return (exp(hypers$logEll))
}

# Take Poisson-noised data 'Y' at points 'X', and find a broad-scale ell(Q)
# which describes it.  The goal is for ell(Q) to have the full resolution of
# 'X', and to be able to discover features at any size (i.e. not limited by the
# bandwidth of a Gaussian basis or something).
testNonHypocriticallEll <- function(X, Y, nInt, La, label="test", logStepCap=0.2, tauSq=1e-6) {
  Yg <- PoissonToGauss(Y)
  N <- length(X)
  TS <- quickTimestamp()
  KType <- "SE"

  f <- function(alpha, dLogEll) {
    #cat(sprintf("\tTrying alpha=%.2e starting at %s; ", alpha, quickTimestamp()))
    newLogEll <- logEll + dLogEll * alpha
    names(newLogEll) <- names(logEll)
    LML <- 0
    for (Ri in 1:nReg) {
      idxs <- frList[[Ri]]$allPts
      LML <- LML + logMLSE(params=newLogEll[Ri], constParams=constParams, KType=KType,
        X=X[idxs], Y=Yg[idxs], KEnvir=constructKEnvir(X=X[idxs], KType=KType, hypers=list()))
    }
    hyp <- hypocrisyAndGradient(frX, exp(newLogEll), tauSq)$hyp
    #cat(sprintf("getting score=%.2e (log(ML)=%.2e, hypocrisy=%.2e) at %s\n",
    #   LML-hyp, LML, hyp, quickTimestamp()))
    lastLML <<- LML
    lastHyp <<- hyp
    return (LML - hyp)
  }

  # Some setup code: choose focus regions, find highest ell
  frList <- focusRegionFamily(X=X, nInt=nInt, La=La)
  frX <- focusRegionXVals(X=X, frList=frList)
  nReg <- length(frList)
  #ell <- maxFocusRegionLengthscale(X=X, Yg=Yg, frList=frList)
  ell <- 1
  # Set scale for plots
  stepScale <- 0.3
  yMin <- - stepScale * ell
  yMax <- 1.5 * ell
  # Setup full-res ell, gradient vector, and smoothed-gradient vector
  smGrad <- grad <- logEll <- rep(log(ell), nReg)# + 0.01 * (1:nReg)
  names(logEll) <- rep("logEll", nReg)
  constParams <- log(c(bestSigmaFSq(X=X, Yg=Yg, ell=ell), 0.25))
  names(constParams) <- c("logSigmaFSq", "logSigmaNSq")
  # Setup environment for optimizing hypers within each focus region
  KEnvir <- constructKEnvir(X, KType, hypers=list())

  gScale <- -1
  almostDone <- FALSE
  lastScore <- -Inf
  for (pass in 1:400) {
    # Find gradient for each focus region ribbon
    for (Ri in 1:nReg) {
      idxs <- frList[[Ri]]$allPts
      grad[Ri] <- gradLogMLForSE(params=logEll[Ri], constParams=constParams,
        KEnvir=KEnvir, KType=KType, X=X[idxs], Y=Yg[idxs])
    }
    # Calculate hypocrisy gradient for each focus region
    hypoc <- - exp(logEll) * numericalHypocrisyGradient(frX, exp(logEll), tauSq)
    smGrad <- hypoc + grad

    # Take an optimal (capped) step
    limit <- logStepCap / max(abs(smGrad))
    cat(sprintf("Pass %3d: score = ", pass))
    opt <- optimize(f=f, interval=c(0, limit), maximum=T, tol=0.01*limit, dLogEll=smGrad)
    while (opt$objective < lastScore)
      opt <- optimize(f=f, interval=c(0, limit), maximum=T, tol=0.01*opt$maximum, dLogEll=smGrad)
    alpha <- opt$maximum
    lastScore <- opt$objective
    cat(sprintf("%9.2f (log(ML) = %9.2f; hypocrisy = %9.2f); max. log change = %.2e\n", opt$objective, lastLML, lastHyp, alpha*max(abs(smGrad))))

    # Output a PNG file
    if (gScale < 0) gScale <- 0.5/mean(abs(smGrad))
    png(filename=sprintf("hypEllPrior_N-%04d_La-%03d_tauSq-%s_%s_%s_p-%03d.png", nInt, La, pointToPSN(tauSq), label, TS, pass), width=640, height=640)
    #par(mfrow=c(2,1))
    oldFullFunction <- spline(method="natural", xout=X, x=frX, y=exp(logEll))
    newFullFunction <- spline(method="natural", xout=X, x=frX, y=exp(logEll + alpha * smGrad))
    plot(main=sprintf("ℓ(Q) as prior: pass %d", pass),
      xlab="Q (1/A)", ylab="ℓ(Q) (1/A)", ylim=c(yMin, yMax), type="l", col="gray",
      oldFullFunction)
    abline(h=ell, col="lightblue")
    points(col="gray", type="p", frX, exp(logEll))
    points(type="l", col="black", newFullFunction)
    points(type="p", col="black", frX, exp(logEll + alpha * smGrad))
    arrows(length=0.05, x0=frX, y0=exp(logEll), y1=exp(logEll)+(hypoc+grad)*gScale, col="blue", lwd=2)
    arrows(length=0.05, x0=frX, y0=exp(logEll), y1=exp(logEll)+grad*gScale, col="green")
    arrows(length=0.05, x0=frX, y0=exp(logEll), y1=exp(logEll)+hypoc*gScale, col="red")
    points(type="l", col="blue", frX, exp(logEll)+(hypoc+grad)*gScale)

    # Update quantities
    logEll <- logEll + alpha * smGrad
    names(logEll) <- rep("logEll", nReg)

#   # Plot actual function and fit
#   plot(main="Function and fit", xlab="Q (1/A)", ylab="Intensity (# of counts)",
#     ylim=range(Y), type="p", col="gray",
#     X, Y)
#   points(X, Fp, type="l", col="gray")
#   Fg <- smartKnnKInv(KEnvir, X, KType, hypers) %*% Yg
#   Fp <- GaussToPoisson(Fg)
#   points(X, Fp, type="l", col="black")
    dev.off()

    # Early exit
    gradientShrunk <- max(abs(smGrad)) * 100 < 0.5/gScale
    if (almostDone && gradientShrunk) break
    almostDone <- gradientShrunk

  }
  cat(quickTimestamp(), "\n")
  ellInterp <- minimumHypocrisyInterpolant(X=frX, ell=exp(logEll), XNew=X)
  cat(quickTimestamp(), "\n")
  return (list(frX=frX, ell=exp(logEll), ellInterp=ellInterp))
}

hypocrisyCurve <- function(frX, ell, X, ellVals) {
  hypCurve <- c()
  for (ellX in ellVals) {
    hypCurve <- c(hypCurve, minusLogHypocrisyNewPoint(ellX, X, ell, frX))
  }
  return (list(ell=ellVals, hyp=hypCurve))
}

logMLFocusRegions <- function(X, Y, frList, logEll, sigmaFSq=lastSigmaFSq, tauSq=1) {
  Yg <- PoissonToGauss(Y)
  KType <- "SE"
  N <- length(X)
  frX <- focusRegionXVals(X=X, frList=frList)
  names(logEll) <- rep("logEll", length(logEll))
  LML <- 0
  constParams <- log(c(sigmaFSq, 0.25))
  names(constParams) <- c("logSigmaFSq", "logSigmaNSq")
  for (Ri in 1:length(frList)) {
    idxs <- frList[[Ri]]$allPts
    LML <- LML + logMLSE(params=logEll[Ri], constParams=constParams, KType=KType,
      X=X[idxs], Y=Yg[idxs])
  }
  hyp <- hypocrisyAndGradient(X=frX, ell=exp(logEll), tauSq=tauSq)$hyp
  hypNormed <- hypocrisyAndGradient(X=frX, ell=exp(logEll), tauSq=1)$hyp
  return (data.frame(score=LML-hyp, tauSq=tauSq, LML=LML, hyp=hyp, hypNormed=hypNormed))
}

effectOfTauSq <- function(X, Y, nInt, La, label="test", tauSqVals, startingEll=NULL, sigmaFSq=NA) {
  Yg <- PoissonToGauss(Y)
  N <- length(X)
  TS <- quickTimestamp()
  KType <- "SE"

  # Some setup code: choose focus regions, find highest ell for each
  frList <- focusRegionFamily(X=X, nInt=nInt, La=La)
  frX <- focusRegionXVals(X=X, frList=frList)
  nReg <- length(frList)
  if (is.null(startingEll)) {
    cat("Finding optimal starting ell...")
    TS1 <- quickTimestamp()
    OAFR <- optimizeAllFocusRegions(X=X, Yg=Yg, frList=frList)
    dev.new()
    plot(frX, OAFR$ell, type="p", ylim=c(0, max(OAFR$ell)))
    cat(sprintf("...done!  From %s to %s.\n", TS1, quickTimestamp()))
    lastStartingEll <<- startingEll <- OAFR$ell
    lastSigmaFSq <<- sigmaFSq <- OAFR$sigmaFSq
  }
  logEll <- log(startingEll)
  constParams <- log(c(sigmaFSq, 0.25))
  names(constParams) <- c("logSigmaFSq", "logSigmaNSq")

  # Fitness function: log(ML) - hypocrisy
  f <- function(logEll, tauSq, X, Yg) {
    names(logEll) <- rep("logEll", length(logEll))
    LML <- 0
    for (Ri in 1:nReg) {
      idxs <- frList[[Ri]]$allPts
      LML <- LML + logMLSE(params=logEll[Ri], constParams=constParams, KType=KType,
        X=X[idxs], Y=Yg[idxs])
    }
    hyp <- hypocrisyAndGradient(X=frX, ell=exp(logEll), tauSq=tauSq)$hyp
    return (LML - hyp)
  }
  # Gradient of fitness function
  g <- function(logEll, tauSq, X, Yg) {
    # Start out with the negative gradient of hypocrisy (want grad w.r.t.
    # log(ell), so premultiply through by ell)
    names(logEll) <- rep("logEll", length(logEll))
    grad <- - exp(logEll) * numericalHypocrisyGradient(frX, exp(logEll), tauSq)
    KEnvir <- constructKEnvir(X, KType, hypers=list())
    for (Ri in 1:nReg) {
      idxs <- frList[[Ri]]$allPts
      grad[Ri] <- grad[Ri] + gradLogMLForSE(params=logEll[Ri], constParams=constParams,
        KEnvir=KEnvir, KType=KType, X=X[idxs], Y=Yg[idxs])
    }
    return (grad)
  }

  nTau <- length(tauSqVals)
  logEllList <- vector("list", nTau + 1)
  logEllBnds <- log(range(startingEll))
  names(logEllList) <- c("tau -> Inf", sprintf("tau = %.2e", tauSqVals))
  logEllList[["tau -> Inf"]] <- logEll
  for (i in 1:nTau) {
    # Optimize for this value of tauSq
    tauSq <- tauSqVals[i]

    cat(sprintf("Optimizing tauSq=%.2e (starting %s) ...", tauSq, quickTimestamp()))
    opt <- optim(par=logEll, fn=f, gr=g, method="L-BFGS-B",
      lower=logEllBnds[1], upper=logEllBnds[2],
      control=list(fnscale=-1, trace=0, maxit=400),
      # Extra parameters needed for fn and gr:
      X=X, Yg=Yg, tauSq=tauSq)
    cat(sprintf("done (%s)!\n", quickTimestamp()))

    # Save the results
    logEll <- opt$par
    logEllList[[sprintf("tau = %.2e", tauSq)]] <- logEll
    lastLogEllList <<- logEllList

    # Output a png-so-far
    png(filename=sprintf("testTauSq_N-%04d_La-%03d_tauSq-1e%s_%s_%s.png", nInt, La, pointToP(log(tauSq)/log(10)), label, TS), width=640, height=640)
    plot(type="l", col=1, spline(xout=X, x=frX, y=exp(logEllList[[1]])), ylim=1.1*c(0, max(exp(logEllList[[2]]))),
      xlab="Q (1/A)", ylab="ell(Q)", main=sprintf("Effect of tau^2 on ell(Q); %s", label))
    points(col=0, type="p", frX, exp(logEllList[[1]]))
    for (j in (1:i)+1) {
      points(type="l", col=j, spline(xout=X, x=frX, y=exp(logEllList[[j]])))
      points(type="p", col=j, frX, exp(logEllList[[j]]))
    }
    legend(x="topleft", legend=names(logEllList)[1:(i+1)], lty=rep(1, i+1), pch=rep(1, i+1), col=(0:i)+1)
    dev.off()
  }
  return (logEllList)
}

howBoutThemResiduals <- function(X, Y, YTrue, logEllList, sigmaFSq, nInt, La) {
  # A little setup
  frList <- focusRegionFamily(X=X, nInt=nInt, La=La)
  frX <- focusRegionXVals(X=X, frList=frList)
  KType="SEFullResEll"
  Yg <- PoissonToGauss(Y)
  YTrueG <- PoissonToGauss(YTrue)

  # Benchmark: AWS
  AWSG <- lpaws(Yg, ladjust=1)@theta[,1]
  AWSGP <- GaussToPoisson(AWSG)
  AWS <- lpaws(Y, ladjust=1)@theta[,1]
  AWSPG <- PoissonToGauss(AWS)

  # Now: calculate those resids and generate the figs!
  for (Ti in 1:length(logEllList)) {
    tauSq <- ifelse(Ti==1, Inf, 10^(-Ti))
    cat(sprintf("doing tau^2=%.2e, starting at %s\n", tauSq, quickTimestamp()))
    logEllInterp <- spline(method="natural", xout=X, x=frX, y=logEllList[[Ti]])$y
    hypers <- hypersForSEFullResEll(logEll=logEllInterp, sigmaFSq=sigmaFSq)
    KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)
    Ug <- smartKnnKInv(KEnvir, X, KType, hypers) %*% Yg
    U <- GaussToPoisson(Ug)
    cat(sprintf("MSRPG scores, tau^2=10^(%06.2f): AWS=%.5f, AWSSqrt=%.5f, Bayes=%.5f\n", log(tauSq)/log(10), MSR(YTrueG, AWSPG), MSR(YTrueG, AWSG), MSR(YTrueG, Ug)))
    cat(sprintf("MSR   scores, tau^2=10^(%06.2f): AWS=%.5f, AWSSqrt=%.5f, Bayes=%.5f\n", log(tauSq)/log(10), MSR(YTrue, AWS), MSR(YTrue, AWSGP), MSR(YTrue, U)))
    write.table(sep="\t", row.names=F, quote=F, signif(data.frame(
        Q=X,
        "True data"=YTrue,
        "Noisy data"=Y,
        "Anscombe transformed data"=Yg,
        "Bayesian"=U,
        "Anscombe transformed Bayesian"=Ug,
        "Anscombe transformed AWS"=AWSG,
        "AWS benchmark (improved)"=AWSGP,
        "AWS benchmark"=AWS
        ), 5), file=sprintf("residualsTest_%02d_%s_%s", Ti, pointToPSN(tauSq), quickTimestamp()))
  }
}

mapOutTauSq <- function(X, Y, YTrue, nInt, La, tauSqVals) {
  TS <- quickTimestamp()
  Yg <- PoissonToGauss(Y)
  # Divide datapoints into odd and even, used as "test" and "training"
  N <- length(X)
  iOdd <- seq(from=1, to=N, by=2)
  iEvn <- seq(from=2, to=N, by=2)
  YTrueG <- PoissonToGauss(YTrue)

  # Data structures: Reo=(R)esiduals of (e)ven trained on (o)dd, etc. ((t)rue)
  Reo <- Roe <- Ree <- Roo <- Rte <- Rto <- c()

  # Divide data into focus regions, and find optimal starting point for each region
  nIntSm <- nInt#round(nInt/2)
  LaSm <- La#round(La/2)
  frListOdd <- focusRegionFamily(X=X[iOdd], nInt=nIntSm, La=LaSm)
  frListEvn <- focusRegionFamily(X=X[iEvn], nInt=nIntSm, La=LaSm)
  frXOdd <- focusRegionXVals(X=X[iOdd], frList=frListOdd)
  frXEvn <- focusRegionXVals(X=X[iEvn], frList=frListEvn)
  OAFROdd <- optimizeAllFocusRegions(X=X[iOdd], Yg=Yg[iOdd], frList=frListOdd)
  OAFREvn <- optimizeAllFocusRegions(X=X[iEvn], Yg=Yg[iEvn], frList=frListEvn)
  sigmaFSqOdd <- OAFROdd$sigmaFSq
  sigmaFSqEvn <- OAFREvn$sigmaFSq
  logEllOdd <- log(OAFROdd$ell)
  logEllEvn <- log(OAFREvn$ell)
  logEllBndsOdd <- range(logEllOdd)
  logEllBndsEvn <- range(logEllEvn)
  constParamsOdd <- log(c(sigmaFSqOdd, 0.25))
  constParamsEvn <- log(c(sigmaFSqEvn, 0.25))
  names(constParamsOdd) <- c("logSigmaFSq", "logSigmaNSq")
  names(constParamsEvn) <- c("logSigmaFSq", "logSigmaNSq")

  # Fitness function: log(ML) - hypocrisy
  f <- function(logEll, tauSq, X, Yg, frList, frX) {
    names(logEll) <- rep("logEll", length(logEll))
    LML <- 0
    for (Ri in 1:length(frList)) {
      idxs <- frList[[Ri]]$allPts
      LML <- LML + logMLSE(params=logEll[Ri], constParams=constParams, KType=KType,
        X=X[idxs], Y=Yg[idxs])
    }
    hyp <- hypocrisyAndGradient(X=frX, ell=exp(logEll), tauSq=tauSq)$hyp
    return (LML - hyp)
  }
  # Gradient of fitness function
  g <- function(logEll, tauSq, X, Yg, frList, frX) {
    # Start out with the negative gradient of hypocrisy (want grad w.r.t.
    # log(ell), so premultiply through by ell)
    names(logEll) <- rep("logEll", length(logEll))
    grad <- - exp(logEll) * numericalHypocrisyGradient(frX, exp(logEll), tauSq)
    KEnvir <- constructKEnvir(X, KType, hypers=list())
    for (Ri in 1:length(frList)) {
      idxs <- frList[[Ri]]$allPts
      grad[Ri] <- grad[Ri] + gradLogMLForSE(params=logEll[Ri], constParams=constParams,
        KEnvir=KEnvir, KType=KType, X=X[idxs], Y=Yg[idxs])
    }
    return (grad)
  }

  # First, find the function that best denoises Yg(X).  What is its MSR when
  # evaluated at YOut(XOut)?
  calcResids <- function(X, Yg, hypers, KEnvir, XOut, YgOut) {
    Ug <- interpolatedFullResEllMatrix(X=X, hypers=hypers, XOut=XOut, KEnvir=KEnvir) %*% Yg
    return (MSR(YgOut, Ug))
  }

  # Now, calculate the residuals for each tauSq
  NC <- 7
  WIDTH <- 9
  cat(sprintf("|%9s|%9s|%9s|%9s|%9s|%9s|%9s| (Timestamp)\n%s\n", "log tauSq",
      "Roo", "Reo", "Rto", "Roe", "Ree", "Rte",
      paste(c(rep("=",NC+1), rep(rep("=",NC),WIDTH)), collapse="")))
  for (tauSq in tauSqVals) {
    # Code for ODD POINTS:
    opt <- optim(par=logEllOdd, fn=f, gr=g, method="L-BFGS-B",
      lower=logEllBndsOdd[1], upper=logEllBndsOdd[2],
      control=list(fnscale=-1, trace=0, maxit=400),
      # Extra parameters needed for fn and gr:
      X=X[iOdd], Yg=Yg[iOdd], tauSq=tauSq, frList=frListOdd, frX=frXOdd)
    logEllOdd <- opt$par
    logEllInterp <- spline(method="natural", xout=X[iOdd], x=frXOdd, y=logEllOdd)$y
    hypers <- hypersForSEFullResEll(logEll=logEllInterp, sigmaFSq=sigmaFSqOdd)
    KEnvir <- constructKEnvir(X=X[iOdd], KType="SEFullResEll", hypers=hypers)
    Roo <- c(Roo, calcResids(X=X[iOdd], Yg=Yg[iOdd], hypers=hypers, KEnvir=KEnvir, XOut=X[iOdd], YgOut=Yg[iOdd]))
    Reo <- c(Reo, calcResids(X=X[iOdd], Yg=Yg[iOdd], hypers=hypers, KEnvir=KEnvir, XOut=X[-iOdd], YgOut=Yg[-iOdd]))
    Rto <- c(Rto, calcResids(X=X[iOdd], Yg=Yg[iOdd], hypers=hypers, KEnvir=KEnvir, XOut=X, YgOut=YTrueG))
    # Code for EVEN POINTS (same as odd, but s/Odd/Evn/g):
    opt <- optim(par=logEllEvn, fn=f, gr=g, method="L-BFGS-B",
      lower=logEllBndsEvn[1], upper=logEllBndsEvn[2],
      control=list(fnscale=-1, trace=0, maxit=400),
      # Extra parameters needed for fn and gr:
      X=X[iEvn], Yg=Yg[iEvn], tauSq=tauSq, frList=frListEvn, frX=frXEvn)
    logEllEvn <- opt$par
    logEllInterp <- spline(method="natural", xout=X[iEvn], x=frXEvn, y=logEllEvn)$y
    hypers <- hypersForSEFullResEll(logEll=logEllInterp, sigmaFSq=sigmaFSqEvn)
    KEnvir <- constructKEnvir(X=X[iEvn], KType="SEFullResEll", hypers=hypers)
    Roe <- c(Roe, calcResids(X=X[iEvn], Yg=Yg[iEvn], hypers=hypers, KEnvir=KEnvir, XOut=X[iEvn], YgOut=Yg[iEvn]))
    Ree <- c(Ree, calcResids(X=X[iEvn], Yg=Yg[iEvn], hypers=hypers, KEnvir=KEnvir, XOut=X[-iEvn], YgOut=Yg[-iEvn]))
    Rte <- c(Rte, calcResids(X=X[iEvn], Yg=Yg[iEvn], hypers=hypers, KEnvir=KEnvir, XOut=X, YgOut=YTrueG))
    # Output results
    cat(sprintf("|%9.2f|%9.6f|%9.6f|%9.6f|%9.6f|%9.6f|%9.6f|%s\n", log(tauSq)/log(10),
        tail(Roo, 1), tail(Reo, 1), tail(Rto, 1), tail(Roe, 1), tail(Ree, 1), tail(Rte, 1),
        quickTimestamp()))
    write.table(sep="\t", row.names=F, quote=F, data.frame(tauSq=tauSqVals[1:length(Reo)], Reo=Reo, Roe=Roe, Ree=Ree, Roo=Roo, Rte=Rte, Rto=Rto), file=sprintf("tauSqMap_%s", TS))
  }

  return (data.frame(tauSq=tauSqVals, Reo=Reo, Roe=Roe, Ree=Ree, Roo=Roo, Rte=Rte, Rto=Rto))
}

interpolateVaryingEll <- function(X, ell, XOut, tolerance=0.01*min(ell)) {
  lastEllOut <- ellOut <- ellOutStart <- spline(x=X, xout=XOut, y=ell)$y
  KType <- "SEFullResEll"
  # Optimize sigmaFSq
  sigmaFSq <- bestSigmaFSqVaryingEll(X=X, Yg=ell, ell=ell, sigmaNSq=0)
  interpolations <- 0
  repeat {
    KEnvir <- constructKEnvir(X=XOut, KType=KType, hypers=hypersForSEFullResEll(logEll=log(ellOut),sigmaFSq=sigmaFSq))
    ellOut <- smartKnnKInv(KEnvir) %*% lastEllOut
    interpolations <- interpolations + 1
    changeSq <- (ellOut-lastEllOut)^2
    meanChange <- mean(changeSq)
    maxChange <- max(changeSq)
    cat(sprintf("Iteration %3d: Changed average of %.2e, max. of %.2e\n", interpolations, meanChange, maxChange))
    if (maxChange < tolerance) break
    lastEllOut <- ellOut
  }
  plot(XOut, ellOutStart, type="l", col="black", lwd=2)
  points(XOut, ellOut, type="l", col="red", lwd=1)
  return (ellOut)
}

denoisePoissonDataset <- function(X, Y, nInt, La, tauSq, YTrue=NULL, wordy=T, KEnvir=NULL) {
  reportComputationStart <- function(name, TS, width=19, wordy=T) {
    if (wordy) {
      fmtstring <- sprintf("\tStarted computing %%-%ds (%%s)\n", width)
      cat(sprintf(fmtstring, name, TS))
    }
  }
  Yg <- PoissonToGauss(Y)
  DateStamp <- quickTimestamp("%B %d, %Y")
  hypers <- TS.hypers.start <- TS.mean.start <- TS.sd.start <- TS.summary.start <- "NA"
  KType="SEFullResEll"
  if (is.null(KEnvir)) {
    # Divide into focus regions and find optimal hyperparameters
    frList <- focusRegionFamily(X=X, nInt=nInt, La=La)
    frX <- focusRegionXVals(X=X, frList=frList)
    TS.hypers.start <- quickTimestamp("%H:%M:%S")
    reportComputationStart("hyperparameters", TS.hypers.start)
    hypers <- optimalHypersFocusRegionsHypocrisy(X, Yg, frList, tauSq)

    # Invert matrix to find mean and standard deviation
    KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)
    TS.mean.start <- quickTimestamp("%H:%M:%S")
    reportComputationStart("mean function", TS.mean.start)
    Ug <- smartKnnKInv(KEnvir) %*% Yg
    TS.sd.start <- quickTimestamp("%H:%M:%S")
    reportComputationStart("covariance matrix", TS.sd.start)
    Cov <- smartKNoNoise(KEnvir) - smartKnnKInv(KEnvir, X, KType, hypers) %*% smartKNoNoise(KEnvir)
    save(KEnvir, file=sprintf("KEnvir.saved.%s", quickTimestamp()))
  }
  else {
    Ug <- smartKnnKInv(KEnvir) %*% Yg
    Cov <- smartKNoNoise(KEnvir) - smartKnnKInv(KEnvir) %*% smartKNoNoise(KEnvir)
  }
  sigma <- sqrt(diag(Cov))

  # Calculate benchmarks and output datafile
  TS.summary.start <- quickTimestamp("%H:%M:%S")
  reportComputationStart("summary statistics", TS.summary.start)
  AWS.naive <- lpaws(Y, ladjust=1)@theta[,1]
  AWS.Anscombe <- lpaws(Yg, ladjust=1)@theta[,1]
  library(msProcess)
  Wav.Ans <- msDenoiseWavelet(x=Yg, noise.variance=0.25)
  results <- data.frame(
    Q=X,
    noisy=Y,
    AWS.naive=AWS.naive,
    AWS.Anscombe=GaussToPoisson(AWS.Anscombe),
    Wavelets=GaussToPoisson(Wav.Ans),
    Bayes.mean=GaussToPoisson(Ug),
    Bayes.1sd.plus=GaussToPoisson(Ug+sigma),
    Bayes.1sd.minus=GaussToPoisson(Ug-sigma)
    )
  results.anscombe <- data.frame(
    Q=X,
    noisy=Yg,
    AWS.naive=PoissonToGauss(AWS.naive),
    AWS.Anscombe=AWS.Anscombe,
    Wavelets=Wav.Ans,
    Bayes.mean=Ug,
    Bayes.1sd=sigma
    )
  if (!is.null(YTrue)) {
    results <- cbind(results, data.frame(True=YTrue))
    results.anscombe <- cbind(results.anscombe, data.frame(True=PoissonToGauss(YTrue)))
  }
  TS <- quickTimestamp()
  write.table(sep="\t", row.names=F, quote=F, signif(results, 5), file=sprintf("denoised_%s", TS))
  write.table(sep="\t", row.names=F, quote=F, signif(results.anscombe, 5), file=sprintf("denoised_%s-Anscombe", TS))
  if (wordy) cat(sprintf("Ran starting on %s.\n", DateStamp))
  reportComputationStart("hyperparameters", TS.hypers.start)
  reportComputationStart("mean function", TS.mean.start)
  reportComputationStart("covariance matrix", TS.sd.start)
  reportComputationStart("summary statistics", TS.summary.start)
  return (list(Ug=Ug, sigma=sigma, hypers=hypers))
}

testCredibleIntervals <- function(Ug, YgTrue, sigma) {
  normedResids <- abs(Ug-YgTrue)/sigma
  sortedNR <- 2*sort(pnorm(normedResids)) - 1
  N <- length(Ug)
  expect <- (1:N)/N
  plot(expect, sortedNR, type="l", lwd=2)
  return (list(expected=expect, actual=sortedNR))
}

prepareToDenoiseIgorsData <- function(nInt=200, La=20) {
  tsv <- 10^(-(6:11))
  ellFunctions <- vector("list", 5)
  names(ellFunctions) <- c(800,400,100,40,10)
  ellFunctions[["800"]] <- effectOfTauSq(X=datapts800s$Q, Y=datapts800s$y, nInt=nInt, La=20, label="800 sec", tauSqVals=tsv)
  ellFunctions[["400"]] <- effectOfTauSq(X=datapts400s$Q, Y=datapts400s$y, nInt=nInt, La=20, label="400 sec", tauSqVals=tsv)
  ellFunctions[["100"]] <- effectOfTauSq(X=datapts100s$Q, Y=datapts100s$y, nInt=nInt, La=20, label="100 sec", tauSqVals=tsv)
  ellFunctions[["40"]] <- effectOfTauSq(X=datapts40s$Q, Y=datapts40s$y, nInt=nInt, La=20, label="40 sec", tauSqVals=tsv)
  ellFunctions[["10"]] <- effectOfTauSq(X=datapts10s$Q, Y=datapts10s$y, nInt=nInt, La=20, label="10 sec", tauSqVals=tsv)
  return (ellFunctions)
}

denoiseIgorsData <- function() {
  PAPERDATA.Igor.800s <<- denoisePoissonDataset(X=datapts800s$Q, Y=datapts800s$y, nInt=200, La=20, tauSq=1e-4, wordy=T)

}

illustrateGaussianProcess <- function(N=100, ellVals=c(0.2,2), sigmaFVals=c(1,10),
  X=seq(from=0, to=4, length.out=N), KType="SE", rSeed=4,
  YTrue=5+0.1*X+2*sigmaFVals[2]*exp(-0.5*((X-1)/(0.5*ellVals[1]))^2)) {
  set.seed(rSeed)
  # Construct environments for all the hyperparam combinations
  nHypers <- length(ellVals)*length(sigmaFVals)
  aPosteriori <- aPriori <- KEnvir <- vector("list", nHypers)
  hypers <- data.frame(ell=numeric(nHypers), sigmaFSq=numeric(nHypers), Ki=numeric(nHypers))
  i <- 1
  for (ell in ellVals) {
    for (sigmaF in sigmaFVals) {
      hypers$ell[i] <- ell
      hypers$sigmaFSq[i] <- sigmaF^2
      hypers$Ki[i] <- i
      KEnvir[[i]] <- constructKEnvir(X=X, KType=KType, hypers=hypersForSE(ell=ell, sigmaFSq=hypers$sigmaFSq[i]))
      i <- i + 1
    }
  }
  # Setup fit functions for the posterior (and its gradient)
  YNoise <- rpois(lambda=YTrue, n=N)
  fit <- function(Y, KEnvir) {
    logLike <- sum(dpois(x=YNoise, lambda=Y, log=T))
    logPrior <- -0.5*(Y %*% smartKnnInv(KEnvir) %*% Y)
    return (logPrior + logLike)
  }
  dFit <- function(Y, KEnvir) {
    return (YNoise/Y - 1 - smartKnnInv(KEnvir) %*% Y)
  }
  # Check that gradient really works
  searchDir <- rnorm(n=N)
  alpha <- 1
  Ystart <- pmax(1, rpois(n=N, lambda=YTrue))
  myGrad <- dFit(Y=Ystart, KEnvir=KEnvir[[1]])
  LPost <- fit(Y=pmax(1,Ystart), KEnvir=KEnvir[[1]])
  for (i in 1:10) {
    change <- (alpha*searchDir)
    actual <- fit(Y=pmax(1,Ystart+change), KEnvir=KEnvir[[1]]) - LPost
    expected <- sum(change * myGrad)
    cat(sprintf("Ratio: %8.6f (actual=%.2e)/(expected=%.2e)\n", actual/expected, actual, expected))
    alpha <- alpha / 10.0
  }
  # Solve the posterior for each
  for (i in 1:nHypers) {
    opt <- optim(par=pmax(YNoise, 1), method="L-BFGS-B", fn=fit, gr=dFit,
      lower=max(1,min(YNoise)), upper=max(YNoise),
      control=list(fnscale=-1, trace=0, maxit=400),
      # Extra parameters needed for fn and gr:
      KEnvir=KEnvir[[i]])
    aPosteriori[[i]] <- opt$par
    cat(sprintf("Finished optimizing #%d at %s\n", i, quickTimestamp()))
  }
  # Plot one draw from the prior for each combination (ell, sigmaF)
  randVec <- rnorm(n=N)
  for (i in 1:nHypers) {
    aPriori[[i]] <- t(smartKChol(KEnvir[[i]])) %*% randVec
  }
  myCol <- c("blue", "blue", "red", "red")
  myLty <- rep(c(2,1),2)
  png(filename=sprintf("example_prior_functions_%s.png", quickTimestamp("%Y-%m-%d")), width=800, height=600)
  yRange <- range( c(aPriori[[1]], aPriori[[2]], aPriori[[3]], aPriori[[4]], YNoise, YTrue,
      aPosteriori[[1]], aPosteriori[[2]], aPosteriori[[3]], aPosteriori[[4]]))
  plot(NULL, xlab="X", ylab="f(X)", xlim=range(X), main="Prior functions", ylim=yRange)
  abline(h=0)
  for (i in 1:nHypers)
    points(X, aPriori[[i]], type="l", col=myCol[i], lty=myLty[i], lwd=2)
  dev.off()
  # Plot posterior functions
  png(filename=sprintf("example_posterior_functions_%s.png", quickTimestamp("%Y-%m-%d")), width=800, height=600)
  plot(X, YNoise, col="black", xlab="X", ylab="f(X)", lwd=2, xlim=range(X), main="Posterior functions", ylim=yRange)
  abline(h=0)
  #points(type="l", X, YTrue, lwd=2)
  for (i in 1:nHypers)
    points(X, aPosteriori[[i]], type="l", col=myCol[i], lty=myLty[i], lwd=2)

  dev.off()
}

calcSomeResids <- function(X, Y, hypers, KType, N=1000, YTrue=NULL) {
  writeEvery <- 20
  KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)
  residsTrue <- resids <- 0*(1:N)
  Yg <- PoissonToGauss(Y)
  YTrueG <- PoissonToGauss(YTrue)
  Ug <- smartKnnKInv(KEnvir) %*% Yg
  for (i in 1:N) {
    URand <- Ug + smartL(KEnvir) %*% rnorm(n=length(X))
    resids[i] <- MSR(URand, Yg)
    if (!is.null(YTrue))
      residsTrue[i] <- MSR(URand, YTrueG)
    if (i %% writeEvery == 0)
      write.table(sep="\t", row.names=F, quote=F, signif(data.frame(noisy=resids, true=residsTrue), 5), file="latestResids")
  }
  x <- data.frame(noisy=resids)
  if (!is.null(YTrue))
    x <- cbind(x, data.frame(true=residsTrue))
  return (x)
}

testTrainEvenOddWavelets <- function(X, Y) {
  Yg <- PoissonToGauss(Y)
  N <- length(X)
  iOdd <- seq(from=1, to=N, by=2)
  iEvn <- seq(from=2, to=N, by=2)

  # Wavelets (Anscombe) for comparison
  library(msProcess)
  WavgOdd <- msDenoiseWavelet(Yg[iOdd], noise.variance=0.25)
  WavgEvnOdd <- 0.5*(WavgOdd[-1] + WavgOdd[-length(WavgOdd)])
  iEvnOdd <- 0.5*(iOdd[-1] + iOdd[-length(iOdd)])
  WavgEvn <- msDenoiseWavelet(Yg[iEvn], noise.variance=0.25)
  WavgOddEvn <- 0.5*(WavgEvn[-1] + WavgEvn[-length(WavgEvn)])
  iOddEvn <- 0.5*(iEvn[-1] + iEvn[-length(iEvn)])
  Wavg.odd.from.evn <- MSR(WavgOddEvn, Yg[iOddEvn])
  Wavg.evn.from.evn <- MSR(WavgEvn, Yg[iEvn])
  Wavg.odd.from.odd <- MSR(WavgOdd, Yg[iOdd])
  Wavg.evn.from.odd <- MSR(WavgEvnOdd, Yg[iEvnOdd])
  Wavg <- data.frame(
    Wavg.odd.from.evn=Wavg.odd.from.evn,
    Wavg.evn.from.evn=Wavg.evn.from.evn,
    Wavg.odd.from.odd=Wavg.odd.from.odd,
    Wavg.evn.from.odd=Wavg.evn.from.odd)
  write.table(sep="\t", row.names=FALSE, quote=FALSE, signif(Wavg, 5), file="testTrainEvenOddWav")
  return (Wavg)

}

testTrainEvenOdd <- function(X, Y, hypers, KType, nDraws=1000, K.nn=get(sprintf("GramMatrix%s", KType))(X=X, hypers=hypers)) {
  writeEvery <- 20
  Yg <- PoissonToGauss(Y)
  N <- length(X)
  iOdd <- seq(from=1, to=N, by=2)
  iEvn <- seq(from=2, to=N, by=2)

  # AWS (Anscombe) for comparison
  AWSgOdd <- lpaws(Yg[iOdd], ladjust=1)@theta[,1]
  AWSgEvnOdd <- 0.5*(AWSgOdd[-1] + AWSgOdd[-length(AWSgOdd)])
  iEvnOdd <- 0.5*(iOdd[-1] + iOdd[-length(iOdd)])
  AWSgEvn <- lpaws(Yg[iEvn], ladjust=1)@theta[,1]
  AWSgOddEvn <- 0.5*(AWSgEvn[-1] + AWSgEvn[-length(AWSgEvn)])
  iOddEvn <- 0.5*(iEvn[-1] + iEvn[-length(iEvn)])
  AWSg.odd.from.evn <- MSR(AWSgOddEvn, Yg[iOddEvn])
  AWSg.evn.from.evn <- MSR(AWSgEvn, Yg[iEvn])
  AWSg.odd.from.odd <- MSR(AWSgOdd, Yg[iOdd])
  AWSg.evn.from.odd <- MSR(AWSgEvnOdd, Yg[iEvnOdd])

  # AWS (raw) for comparison
  AWSOdd <- lpaws(Y[iOdd], ladjust=1)@theta[,1]
  AWSEvnOdd <- 0.5*(AWSOdd[-1] + AWSOdd[-length(AWSOdd)])
  iEvnOdd <- 0.5*(iOdd[-1] + iOdd[-length(iOdd)])
  AWSEvn <- lpaws(Y[iEvn], ladjust=1)@theta[,1]
  AWSOddEvn <- 0.5*(AWSEvn[-1] + AWSEvn[-length(AWSEvn)])
  iOddEvn <- 0.5*(iEvn[-1] + iEvn[-length(iEvn)])
  AWS.odd.from.evn <- MSR(PoissonToGauss(AWSOddEvn), Yg[iOddEvn])
  AWS.evn.from.evn <- MSR(PoissonToGauss(AWSEvn), Yg[iEvn])
  AWS.odd.from.odd <- MSR(PoissonToGauss(AWSOdd), Yg[iOdd])
  AWS.evn.from.odd <- MSR(PoissonToGauss(AWSEvnOdd), Yg[iEvnOdd])

  resids <- cbind(data.frame(
        AWS.odd.from.evn=AWS.odd.from.evn,
        AWS.odd.from.odd=AWS.odd.from.odd,
        AWS.evn.from.evn=AWS.evn.from.evn,
        AWS.evn.from.odd=AWS.evn.from.odd,
        AWSg.odd.from.evn=AWSg.odd.from.evn,
        AWSg.odd.from.odd=AWSg.odd.from.odd,
        AWSg.evn.from.evn=AWSg.evn.from.evn,
        AWSg.evn.from.odd=AWSg.evn.from.odd
        ), testTrainEvenOddWavelets(X=X, Y=Y))
  write.table(sep="\t", row.names=FALSE, quote=FALSE, signif(resids, 5), file="testTrainEvenOddAWS")

  # Bayesian (mean) for comparison...
  # First, set up odd and even versions of the variables we need
  K <- K.nn + 0.25*diag(nrow(K.nn))
  KInv.odd <- solve(K[iOdd, iOdd])
  KInv.evn <- solve(K[iEvn, iEvn])
  KInv <- solve(K)
  Ugo <- K.nn[,iOdd] %*% KInv.odd %*% Yg[iOdd]
  Uge <- K.nn[,iEvn] %*% KInv.evn %*% Yg[iEvn]
  II <- diag(nrow(K.nn))
  L.odd <- t(chol(K.nn - K.nn[,iOdd]%*%KInv.odd%*%K.nn[iOdd,] + 0.25e-4*II))
  L.evn <- t(chol(K.nn - K.nn[,iEvn]%*%KInv.evn%*%K.nn[iEvn,] + 0.25e-4*II))

  resids <- cbind(resids, data.frame(
      Bayes.mean.odd.from.odd=MSR(Ugo[iOdd], Yg[iOdd]),
      Bayes.mean.evn.from.odd=MSR(Ugo[iEvn], Yg[iEvn]),
      Bayes.mean.odd.from.evn=MSR(Uge[iOdd], Yg[iOdd]),
      Bayes.mean.evn.from.evn=MSR(Uge[iEvn], Yg[iEvn])))
  write.table(sep="\t", row.names=F, quote=F, signif(resids,5), file="testTrainEvenOddAWS")

  write.table(sep="\t", row.names=F, quote=F, signif(data.frame(Q=X, noisy=Y, odd=GaussToPoisson(Ugo), evn=GaussToPoisson(Uge)), 5), file="oddAndEven")

  odd.from.evn <- odd.from.odd <- evn.from.odd <- evn.from.evn <- NA * (1:nDraws)
  for (i in 1:nDraws) {
    rand <- rnorm(n=length(X))
    UgoRand <- Ugo + L.odd %*% rand
    UgeRand <- Uge + L.evn %*% rand
    odd.from.odd[i] <- MSR(UgoRand[iOdd], Yg[iOdd])
    evn.from.odd[i] <- MSR(UgoRand[iEvn], Yg[iEvn])
    odd.from.evn[i] <- MSR(UgeRand[iOdd], Yg[iOdd])
    evn.from.evn[i] <- MSR(UgeRand[iEvn], Yg[iEvn])
    if (i %% writeEvery == 0) {
      write.table(sep="\t", row.names=F, quote=F, signif(data.frame(
            odd.from.evn=odd.from.evn,
            odd.from.odd=odd.from.odd,
            evn.from.evn=evn.from.evn,
            evn.from.odd=evn.from.odd
            ), 5), file="testTrainEvenOdd")
    }
  }
  return (data.frame(
            odd.from.evn=odd.from.evn,
            odd.from.odd=odd.from.odd,
            evn.from.evn=evn.from.evn,
            evn.from.odd=evn.from.odd))
}

makeGaussianBlurs <- function(cVals, X, Y, Xout=X, ell) {
  i <- 1
  qG <<- list()
  for (CV in cVals) {
    blurred <- quickGaussianBlur(X=X, Y=Y, Xout=X, Sigma=CV*ell)
    qG <<- c(qG, list(blurred))
    points(X, blurred, type="l", col=i)
    i <- i + 1
  }
}

gaussTestTrain <- function(X, Y, ell, YTrue, cVals) {
  writeEvery <- 20
  Yg <- PoissonToGauss(Y)
  YgTrue <- PoissonToGauss(YTrue)
  N <- length(X)
  iOdd <- seq(from=1, to=N, by=2)
  iEvn <- seq(from=2, to=N, by=2)

  tru.from.evn <- tru.from.odd <- odd.from.evn <- odd.from.odd <- evn.from.odd <- evn.from.evn <- NA * cVals

  for (i in 1:length(cVals)) {
    cv <- cVals[i]
    # Blur each function by c*ell
    odd.blurred <- quickGaussianBlur(X=X[iOdd], Xout=X, Y=Yg[iOdd], Sigma=cv*ell[iOdd])
    evn.blurred <- quickGaussianBlur(X=X[iEvn], Xout=X, Y=Yg[iEvn], Sigma=cv*ell[iEvn])
    # Calc resids
    odd.from.evn[i] <- MSR(evn.blurred[iOdd], Yg[iOdd])
    odd.from.odd[i] <- MSR(odd.blurred[iOdd], Yg[iOdd])
    evn.from.evn[i] <- MSR(evn.blurred[iEvn], Yg[iEvn])
    evn.from.odd[i] <- MSR(odd.blurred[iEvn], Yg[iEvn])
    tru.from.evn[i] <- MSR(evn.blurred, YgTrue)
    tru.from.odd[i] <- MSR(odd.blurred, YgTrue)
  }

  return (data.frame(
            odd.from.evn=odd.from.evn,
            odd.from.odd=odd.from.odd,
            evn.from.evn=evn.from.evn,
            evn.from.odd=evn.from.odd,
            tru.from.evn=tru.from.evn,
            tru.from.odd=tru.from.odd))
}

dctMatrices <- function(N) {
  AInv <- A <- matrix(0, nrow=N, ncol=N)
  for (Col in 1:N) {
    e <- rep(0,N)
    e[Col] <- 1
    A   [,Col] <- dct(x=e, variant=2, inverted=F)
    AInv[,Col] <- dct(x=e, variant=2, inverted=T)
  }
  return (list(dct=A, dctInv=AInv))
}

# Return an orthogonal matrix corresponding to a Haar wavelet change of basis
# for N datapoints.  In other words, the weight vector w is given as w = My
haarMatrix <- function(N) {
  i <- 2
  sqrt2 <- sqrt(2)
  M <- matrix(c(1,1,1,-1)/sqrt2, nrow=2, ncol=2)
  while (i < N) {
    i <- i * 2
    Mnew <- matrix(0, nrow=i, ncol=i)
    iOdd <- seq(from=1, to=i, by=2)
    iEvn <- seq(from=2, to=i, by=2)
    iHalf <- 1:(i/2)
    Mnew[iOdd,iHalf] <- Mnew[iEvn,iHalf] <- M
    M <- Mnew/sqrt(2)
    # Now, populate the right side of the matrix
    M[1:i, (1:i)[-iHalf]] <- matrix(c(1,-1,rep(c(rep(0,i),1,-1),i/2-1))/sqrt2, nrow=i, ncol=i/2)
  }
  return (t(M)[,1:N])
}

GPBasisMatrix <- function(X, ell) {
  XMid <- mean(range(X))
  nCtrl <- floor(diff(range(X))/ell) + 1
  XCtrl <- XMid+ell*((1:nCtrl) - (nCtrl+1)/2)
  indices <- sapply(X=XCtrl, FUN=indexOfClosestValue, myVec=X)
  toCOB <- matrix(0, nrow=nCtrl, ncol=length(X))
  toCOB[cbind(1:length(indices),indices)] <- 1
  hypers <- hypersForSE(sigmaFSq=1, sigmaNSq=0, ell=ell)
  KEnvir <- constructKEnvir(X=X[indices], KType="SE", hypers=hypers)
  fromCOB <- GramMatrixSE(X=X[indices], hypers=hypers, XOut=X) %*% smartKInv(KEnvir)
  return (list(toCOB=toCOB, fromCOB=fromCOB))
}

# Find the full-resolution ell(Q) with hypocrisy prior
fullResEllHypocrisyPrior <- function(X, Y, startEll=mean(ellBounds), sigmaFSq, tauSq, ellBounds, writePics=T, YTrue=NULL) {
  # Setup hyperparameters
  N <- length(X)
  Yg <- PoissonToGauss(Y)
  logEll <- rep(log(startEll),N)
  hypers <- hypersForSEFullResEll(logEll=logEll, sigmaFSq=sigmaFSq)
  KType <- "SEFullResEll"
  KEnvir <- constructKEnvir(X=X, KType=KType, hypers=hypers)
  i <- 0
  secBase <- as.numeric(quickTimestamp("%s"))
  TS <- quickTimestamp()

  # Setup matrix for change-of-basis (this makes the average value of log(ell)
  # its own knob to tweak, which should lead to more efficient search
  # directions)
  ones <- rep(1,N)
  #M <- rbind(N*diag(N)-1, ones, deparse.level=0)/N
  #MInv <- cbind(diag(N), ones, deparse.level=0)
  #Ms <- dctMatrices(N)
  #M <- Ms$dct
  #MInv <- Ms$dctInv
  #M <- haarMatrix(N)
  #MInv <- t(M)
  mymats <- GPBasisMatrix(X=X, ell=0.025)
  M <- mymats$toCOB
  MInv <- mymats$fromCOB


  logEllCOB <- M %*% logEll # [C]hange [O]f [B]asis

  # Progress-plotting function
  plotProgress <- function() {
    if (!writePics) return()
    elapsed <- as.numeric(quickTimestamp("%s")) - secBase
    png(filename=sprintf("FRLH_%s_tauSq-%s_%05d.png", TS, pointToPSN(tauSq, 1), i), width=800, height=800)
    par(mfrow=c(2,1))
    # First plot: ell(Q)
    plot(xlab=NA, ylab="ell", main=sprintf("Full-res ell(Q), tauSq=%.2e, t=%d sec\n%s", tauSq, elapsed, TS), type="l", col="black", X, exp(logEll))
    # Second plot: function
    plot(main="Corresponding function fit", xlab="Q (1/A)", ylab="Counts", type="l", col="gray", X, Y)
    points(type="l", col="blue", X, GaussToPoisson(smartKnnKInv(KEnvir) %*% Yg))
    if (!is.null(YTrue)) points(type="l", col="purple", X, YTrue)
    dev.off()
    i <<- i + 1
  }
  # Fitness function: log(ML) + hypocrisy
  fit <- function(logEllCOB) {
    logEll <<- MInv %*% logEllCOB
    hypers$logEll <<- logEll
    LML <- logML(hypers=hypers, KType=KType, X=X, Y=Yg, KEnvir=KEnvir)
    hyp <- hypocrisyAndGradient(X=X, ell=exp(logEll), tauSq=tauSq)
    LASTG <<- hyp$grad
    plotProgress()
    return (LML - hyp$hyp)
  }
  # Gradient of fitness function
  gFit <- function(logEllCOB) {
    logEll <<- MInv %*% logEllCOB
    hypers$logEll <<- logEll
    gLML <- matrix(nrow=N, ncol=1)
    gLML[,1] <- gradLogMLForSEFullResEll(hypers=hypers, KType=KType, X=X, Y=Yg, KEnvir=KEnvir)
    return (t(gLML - LASTG) %*% MInv)
  }

  logBnds <- log(ellBounds)
  diffLimits <- diff(logBnds)
  opt <- optim(par=logEllCOB, method="L-BFGS-B", fn=fit, gr=gFit,
    #lower=logBnds[1], upper=logBnds[2],
    #lower=c(rep(-diffLimits,N),logBnds[1]), upper=c(rep(diffLimits,N),logBnds[2]),
    #lower=N*c(logBnds[1], rep(-diffLimits, N-1)), upper=N*c(logBnds[2], rep(diffLimits, N-1)),
    #lower=c(logBnds[1], rep(-diffLimits, nrow(M)-1)), upper=c(logBnds[2], rep(diffLimits, nrow(M)-1)),
    lower=rep(logBnds[1],N), upper=rep(logBnds[2],N),
    control=list(fnscale=-1, trace=0, maxit=400))

  logEll <- MInv %*% opt$par
  return (exp(logEll))
}

divAndConqFullResEll <- function(nPts, X, Y, ellBounds, sigmaFSq, tauSq, YTrue=NULL) {
  TS <- quickTimestamp()
  N <- length(X)
  Yg <- PoissonToGauss(Y)
  # Divide into overlapping regions
  numRegions <- 2*round(N/nPts)-1
  n <- round(N/round(N/nPts)) # n is like "nPts, stretched to fit N"
  starts <- round(seq(from=1, to=N-n+1, length.out=numRegions))
  ell <- ii <- vector("list", length=numRegions)
  secBase <- as.numeric(quickTimestamp("%s"))
  # Fit each region
  allEll <- ellBounds
  cat("\n\n\n\nRemember: this is shortcut and must be fixed!\n\n\n\n")
  for (j in 3:length(ii)) {
    i <- ii[[j]] <- seq(from=starts[j], length.out=n)
    ell[[j]] <- fullResEllHypocrisyPrior(X=X[i], Y=Y[i], sigmaFSq=sigmaFSq, tauSq=tauSq, ellBounds=ellBounds, writePics=T, YTrue=YTrue[i])
    elapsed <- as.numeric(quickTimestamp("%s")) - secBase
    # Plot progress
    xbnds <- range(c(X[1], X[i]))
    png(filename=sprintf("divAndConq_tauSq-%s_n%05d_%s.png", pointToPSN(tauSq, 1), n, TS), width=800, height=600)
    allEll <- c(allEll, ell[[j]])
    plot(NULL, main=sprintf("Snippets of full-res ell(Q); tauSq=%.2e; %s; tot. elapsed=%d sec", tauSq, TS, elapsed), xlab="Q (1/A)", ylab="ell(Q) (1/A)", xlim=xbnds, ylim=range(allEll))
    for (k in 1:j)
      points(type="l", col=2*(k%%2)+2, X[ii[[k]]], ell[[k]])
    abline(h=ellBounds[1])
    abline(h=ellBounds[2])
    dev.off()
  }
}

compareGPtoSOG <- function(nSects=8, Density=20, nSlide=4) {
  TS <- quickTimestamp()
  ctrl <- (0:nSects)/nSects
  nPtsMinus1 <- nSects*Density
  X <- (0:nPtsMinus1)/nPtsMinus1
  sigma=1/nSects
  # Construct matrices for GP and SOG
  hypers <- hypersForSE(sigmaFSq=1, sigmaNSq=0, ell=sigma)
  KEnvir <- constructKEnvir(X=ctrl, KType="SE", hypers=hypers)
  GPmat <- GramMatrixSE(X=ctrl, hypers=hypers, XOut=X) %*% smartKInv(KEnvir)
  SOGmat <- outer(X, ctrl, function(a,b){return(exp(-0.5*((a-b)/sigma)^2))})
  # See how they do approximating the function
  for (i in 0:nSlide) {
    Xc <- (round(nSects/2) + i/(2*nSlide))/nSects
    Y <- 1/(1+((X-Xc)/(0.5*sigma))^2)
    optGP <- optim(par=rep(0,length(ctrl)), method="L-BFGS-B",
      fn=function(Z){return(MSR(Y, GPmat%*%Z))},
      lower=rep(-10,length(ctrl)), upper=rep(10,length(ctrl)),
      control=list(fnscale=1, trace=0, maxit=400))
    optSOG <- optim(par=rep(0,length(ctrl)), method="L-BFGS-B",
      fn=function(Z){return(MSR(Y, SOGmat%*%Z))},
      lower=rep(-10,length(ctrl)), upper=rep(10,length(ctrl)),
      control=list(fnscale=1, trace=0, maxit=400))
    png(filename=sprintf("compareGPtoSOG_%s_%03d.png", TS, i), width=800, height=600)
    plot(main=sprintf("Iteration %d of %d", i, nSlide), type="l", X, Y, col="black")
    points(col="blue", type="l", lwd=2, X, GPmat %*% optGP$par)
    points(col="blue", type="p", ctrl, optGP$par)
    points(col="red", type="l", X, SOGmat %*% optSOG$par)
    points(col="red", type="p", ctrl, optSOG$par)
    dev.off()
  }
}

# ellDipHints: a data.frame giving boundaries for non-flat regions
# maxFlatPoints: the most extra points to include on either side of a non-flat
# region.  The goal of including these points is to help find the best location
# of the boundary.  I hope that having more flat-region points will tend to
# shrink the non-flat region size, which counteracts the tendency of curvature
# in the data to grow the non-flat region size.
manualSpikyOnFlat <- function(X, Yp, ellDipHints, maxFlatPoints=80, bHCEG=NULL, all.params=NULL, max.change.factor=2) {
  KType <- "SEFullResEll"
  TS <<- quickTimestamp()
  N <- length(X)
  Yg <- PoissonToGauss(Yp)

  # Temporary: just set g to a given value
  g <- 4*max(abs(diff(X)))
  sigmaFSq <- diff(range(Yg))^2

  # Partition into regions, which correspond to potential non-flat areas
  # identified in ellDipHints, PLUS an extension into the "flat" region, which
  # goes on each side for either ''maxFlatPoints'' points, OR until it hits the
  # next region.
  regions <- bareRegions <- tightRegions <- vector("list", nrow(ellDipHints))
  for (i in 1:nrow(ellDipHints)) {
    bareRegions[[i]] <- indicesInRange(v=X, Min=ellDipHints[i,1], Max=ellDipHints[i,4])
    tightRegions[[i]] <- indicesInRange(v=X, Min=ellDipHints[i,2], Max=ellDipHints[i,3])
  }
  for (i in 1:nrow(ellDipHints)) {
    extensionL <- max(1, bareRegions[[i]][1] - maxFlatPoints, ifelse(i > 1, tail(bareRegions[[i-1]],1), 1))
    extensionR <- min(N, tail(bareRegions[[i]],1) + maxFlatPoints, ifelse(i < nrow(ellDipHints), head(bareRegions[[i+1]],1), N))
    regions[[i]] <- extensionL:extensionR
  }

  # Solve a constant-ell GP in the flat regions
  flatIndices <- (1:N)[-unlist(bareRegions)]
  if (is.null(bHCEG)) {
    bHCEG <- bestHypersConstantEllGaussian(X=X[flatIndices], Y=Yg[flatIndices])
  }
  hypers <- list(bgr.ell=bHCEG$ell, bgr.sigma.f.sq=bHCEG$sigmaFSq)
  # Free up all that RAM!
  rm(bHCEG)
  gc()  # call garbage collector

  # Setup variables to store our results
  ellBnds <- c(min(abs(diff(X))), hypers$bgr.ell)
  params <- c(0, 0, 0, mean(ellBnds))
  if (is.null(all.params)) {
    all.params <- matrix(NA, nrow=nrow(ellDipHints), ncol=length(params))
    colnames(all.params) <- names(params) <- c("sigmaFSq", "xL", "xR", "ell")
    all.params[,2:3] <- as.matrix(0.5*(ellDipHints[,c(1,3)] + ellDipHints[,c(2,4)]))
    all.params[,4] <- rep(exp(mean(log(ellBnds))), nrow(all.params))

    # Best guess for each region: solve with const-params in ONLY that region
    constParams <- log(0.25)
    names(constParams) <- "logSigmaNSq"
    fit <- function(params, X, Y) {
      hypers <- hypersFromParamsSE(params=params, constParams=constParams)
      LML <- logML(hypers=hypers, KType="SE", X=X, Y=Y)
      return (LML)
    }
    grad.fit <- function(params, X, Y) {
      hypers <- hypersFromParamsSE(params=params, constParams=constParams)
      grad.LML <- gradLogMLForSE(hypers=hypers, params=params, X=X, Y=Y)
      return (grad.LML)
    }
    for (i in 1:nrow(ellDipHints)) {
      cat(sprintf("Starting region %d...", i))
      # Setup initial values
      params <- c(log(all.params[i,"ell"]), 2*log(diff(range(Yg[tightRegions[[i]]]))))
      names(params) <- c("logEll", "logSigmaFSq")
      lowerLims <- c(log(ellBnds[1]), -Inf)
      upperLims <- c(log(ellBnds[2]), params["logSigmaFSq"] + log(5))
      # Optimize
      opt <- optim(par=params, method="L-BFGS-B",
        fn=fit, gr=grad.fit, lower=lowerLims, upper=upperLims,
        control=list(fnscale=-1, trace=0, maxit=400),
        X=X[tightRegions[[i]]], Y=Yg[tightRegions[[i]]])
      # Record best values
      all.params[i,1] <- exp(opt$par["logSigmaFSq"])
      all.params[i,4] <- exp(opt$par["logEll"])
      cat(sprintf("done!\n"))
    }
    simplisticGuesses <<- all.params
  }

  # Optimize the boundaries
  fit <- function(params, X, Y, K.long) {
    # BOILERPLATE TO CALCULATE K-MATRICES
    g <- params["ell"]
    K.shrt <- GramMatrixSE(X=X, hypers=hypersForSE(ell=params["ell"], sigmaFSq=params["sigmaFSq"]))
    X.xL.scaled <- (X-params["xL"])/g
    X.xR.scaled <- (X-params["xR"])/g
    mask.vals <- 0.5*(s(X.xL.scaled) - s(X.xR.scaled))
    K.mask <- outer(mask.vals, mask.vals)
    K.both <- K.long + K.mask*K.shrt
    noise <- 0.25*diag(length(X))
    K.inv <- solve(K.both + noise)
    myImagePlot(log(K.both+noise))
    K.chol <- t(chol(K.both + noise))

    # Calculations:
    LML <- -0.5*t(Y) %*% K.inv %*% Y - sum(log(diag(K.chol))) - 0.5*length(Y)*log(2*pi)
    return (LML)
  }
  grad.fit <- function(params, X, Y, K.long) {
    # BOILERPLATE TO CALCULATE K-MATRICES
    g <- params["ell"]
    K.shrt <- GramMatrixSE(X=X, hypers=hypersForSE(ell=params["ell"], sigmaFSq=params["sigmaFSq"]))
    X.xL.scaled <- (X-params["xL"])/g
    X.xR.scaled <- (X-params["xR"])/g
    mask.vals <- 0.5*(s(X.xL.scaled) - s(X.xR.scaled))
    K.mask <- outer(mask.vals, mask.vals)
    K.both <- K.long + K.mask*K.shrt
    noise <- 0.25*diag(length(X))
    K.inv <- solve(K.both + noise)
    K.chol <- t(chol(K.both + noise))

    # Additional helpers for gradient
    dQOverEllSq <- genericXDistanceSq(X)/(params["ell"])^2
    d.mask.dxL <- -0.5*ds.1(X.xL.scaled)/g
    d.mask.dxR <-  0.5*ds.1(X.xR.scaled)/g
    w.L <- outer(mask.vals, d.mask.dxL)
    w.R <- outer(mask.vals, d.mask.dxR)
    d.mask.g <- -0.5*(X.xL.scaled*ds.1(X.xL.scaled) - X.xR.scaled*ds.1(X.xR.scaled))/g
    z <- outer(mask.vals, d.mask.g)

    # Calculations:
    alpha <- K.inv %*% Y
    matFactor <- alpha %*% t(alpha) - K.inv
    grad <- params # Copy over the "names" structure
    #grad["ell"] <- 0.5 * smartTrace(matFactor, K.mask*K.shrt*dQOverEllSq)
    grad["ell"] <- 0.5 * smartTrace(matFactor, K.shrt*(K.mask*dQOverEllSq/params["ell"] + z + t(z)))
    grad["sigmaFSq"] <- 0.5 * smartTrace(matFactor, K.mask*K.shrt/params["sigmaFSq"])
    grad["xL"] <- 0.5*smartTrace(matFactor, K.shrt*(w.L + t(w.L)))
    grad["xR"] <- 0.5*smartTrace(matFactor, K.shrt*(w.R + t(w.R)))

    return (grad)
  }
  for (i in 1:nrow(ellDipHints)) {
    Xi <- X [regions[[i]]]
    Yi <- Yg[regions[[i]]]
    K.long <- GramMatrixSE(X=Xi, hypers=hypersForSE(ell=hypers$bgr.ell,
        sigmaFSq=hypers$bgr.sigma.f.sq))
    params <- all.params[i,]
    lower <- upper <- params # Copy "names" structure
    lower[c("ell", "sigmaFSq")] <- params[c("ell", "sigmaFSq")] / max.change.factor
    upper[c("ell", "sigmaFSq")] <- params[c("ell", "sigmaFSq")] * max.change.factor
    lower[c("xL","xR")] <- ellDipHints[i,c(1,3)]
    upper[c("xL","xR")] <- ellDipHints[i,c(2,4)]
    old.LML <- fit(params=params, X=Xi, Y=Yi, K.long=K.long)
    opt <- optim(par=params, method="L-BFGS-B",
      fn=fit, gr=grad.fit,
      lower=lower, upper=upper,
      control=list(fnscale=-1, trace=0, maxit=400),
      X=Xi, Y=Yi, K.long=K.long)
    new.LML <- fit(params=opt$par, X=Xi, Y=Yi, K.long=K.long)
    cat(sprintf("Region %d improved from %f to %f.\n", i, old.LML, new.LML))
    all.params[i,] <- opt$par
  }
  hypers$peak.regions <- all.params
  return (hypers)
}

# We assume all these functions have the same ell, and the same sigmas up to a
# constant.  Fit 'em all simultaneously!  Output a datafile with raw and fitted
# datapoints, and also a PNG showing the outputs graphed on top of each other.
fitVictorsData <- function(dataList) {
  # Fit the first dataset to get good starting values.  Later, we'll assume the
  # values we get couldn't be off by more than a factor of 2 (to speed up the
  # convergence search when we fit all simultaneously)
  testData <- dataList[[1]]
  J <- which(testData[,2]>0)
  startVals <- GPGaussianGenericMaxML(X=testData[J,1], Y=sqrt(testData[J,2]), sigmaNSq=0.187, constNoise=F, wordy=T, doGrid=F, label="testingSet")
  margin <- 1.5

  # Find "nicer" data: ignore zero-values, and take the square root to turn
  # Poisson into Gauss.
  nicerData <- vector("list", length=length(dataList))
  logSigmaScales <- vector(length=length(dataList))
  maxLen <- 0
  for (K in 1:length(dataList)) {
    J <- which(dataList[[K]][,2]>0)
    nicerData[[K]] <- dataList[[K]][J,]
    nicerData[[K]][,2] <- sqrt(nicerData[[K]][,2])
    logSigmaScales[K] <- log(max(nicerData[[K]][,2]))
    maxLen <- max(maxLen, length(J))
  }
  best <- startVals
  best["logSigmaFSq"] <- best["logSigmaFSq"] - logSigmaScales[1]
  best["logSigmaNSq"] <- best["logSigmaNSq"] - logSigmaScales[1]

  # Specialized versions of logML and gradient, which let us fit everything at once
  KType <- "SE"
  fit <- function(params) {
    logML <- 0
    for (K in 1:length(dataList)) {
      p <- params
      p["logSigmaNSq"] <- p["logSigmaNSq"] + logSigmaScales[K]
      p["logSigmaFSq"] <- p["logSigmaFSq"] + logSigmaScales[K]
      logML <- logML + logMLSE(params=p, KType=KType,
        X=nicerData[[K]][,1], Y=nicerData[[K]][,2])
    }
    return (logML)
  }
  gFit <- function(params) {
    gLML <- 0
    for (K in 1:length(dataList)) {
      p <- params
      p["logSigmaNSq"] <- p["logSigmaNSq"] + logSigmaScales[K]
      p["logSigmaFSq"] <- p["logSigmaFSq"] + logSigmaScales[K]
      gLML <- gLML + gradLogMLForSE(params=p, KType=KType,
        X=nicerData[[K]][,1], Y=nicerData[[K]][,2])
    }
    return (gLML)
  }

  opt <- optim(par=best, fn=fit, gr=gFit, method="L-BFGS-B",
    control=list(fnscale=-1, trace=0, maxit=400),
    lower=best-log(margin), upper=best+log(margin))

  # Output the fitted datapoints
  fitData <- as.data.frame(matrix(NA, nrow=maxLen, ncol=3*length(nicerData)))
  KEnvir <- constructKEnvir(X=nicerData[[1]][,1], KType=KType, hypers=hypersForSE(
      logEll=opt$par["logEll"], logSigmaFSq=opt$par["logSigmaFSq"]+logSigmaScales[1], logSigmaNSq=opt$par["logSigmaNSq"]+logSigmaScales[1]))
  for (K in 1:length(nicerData)) {
    J <- 3*K+c(-1,0)-1
    fitData[1:length(nicerData[[K]][,2]),3*K] <- (smartKnnKInv(KEnvir=KEnvir, X=nicerData[[K]][,1], hypers=hypersForSE(
      logEll=opt$par["logEll"], logSigmaFSq=opt$par["logSigmaFSq"]+logSigmaScales[1], logSigmaNSq=opt$par["logSigmaNSq"]+logSigmaScales[1])) %*% nicerData[[K]][,2])^2
    nicerData[[K]][,2] <- nicerData[[K]][,2]^2
    fitData[1:length(nicerData[[K]][,2]),J] <- nicerData[[K]]
    names(fitData)[J] <- names(nicerData[[K]])
    names(fitData)[3*K] <- paste(sep='', names(fitData)[3*K-1], "_FIT")
  }
  write.table(sep="\t", row.names=F, quote=F, fitData, file="VictorResults.txt")

  return (opt$par)
}

readVictorsFiles <- function() {
  VicData <<- vector("list", length=11)
  VicData[[1]] <<- read.table("STO.dat")
  names(VicData[[1]]) <<- paste(sep='', "STO_", c("X", "Y"))
  VicData[[2]] <<- read.table("BTZOx00.dat")
  names(VicData[[2]]) <<- paste(sep='', "BTZOx00_", c("X", "Y"))
  VicData[[3]] <<- read.table("BTZOx02.dat")
  names(VicData[[3]]) <<- paste(sep='', "BTZOx02_", c("X", "Y"))
  VicData[[4]] <<- read.table("BTZOx04.dat")
  names(VicData[[4]]) <<- paste(sep='', "BTZOx04_", c("X", "Y"))
  VicData[[5]] <<- read.table("BTZOx12.dat")
  names(VicData[[5]]) <<- paste(sep='', "BTZOx12_", c("X", "Y"))
  VicData[[6]] <<- read.table("BTZOx30.dat")
  names(VicData[[6]]) <<- paste(sep='', "BTZOx30_", c("X", "Y"))
  VicData[[7]] <<- read.table("BTZOx70.dat")
  names(VicData[[7]]) <<- paste(sep='', "BTZOx70_", c("X", "Y"))
  VicData[[8]] <<- read.table("BTZOx90.dat")
  names(VicData[[8]]) <<- paste(sep='', "BTZOx90_", c("X", "Y"))
  VicData[[9]] <<- read.table("BTZOx95.dat")
  names(VicData[[9]]) <<- paste(sep='', "BTZOx95_", c("X", "Y"))
  VicData[[10]] <<- read.table("BTZOx100.dat")
  names(VicData[[10]]) <<- paste(sep='', "BTZOx100_", c("X", "Y"))
  VicData[[11]] <<- read.table("BTZOx100A.dat")
  names(VicData[[11]]) <<- paste(sep='', "BTZOx100A_", c("X", "Y"))
}

plotEffectiveEll <- function() {
  n <- 3 # 3 Gaussian peaks
  A <- runif(n=n, min=0.02, max=0.05)
  sigma <- runif(n=n, min=0.01, max=0.1)
  Cee <- runif(n=n, min=0, max=1)
  N <- 1001 # 1000 points
  X <- seq(from=0, to=1, length.out=N)
  fac1 <- fac2 <- expos <- matrix(NA, nrow=N, ncol=n)
  for (i in 1:n) {
    expos[,i] <- A[i] * exp(-0.5*((X-Cee[i])/sigma[i])^2)
    fac1[,i] <- -(X-Cee[i])/sigma[i]^2
    fac2[,i] <- (1/sigma[i]^2) * (((X-Cee[i])/sigma[i])^2 - 1)
  }
  ell0 <- 0.2 + expos %*% rep(1,n)
  ell1 <- (expos * fac1) %*% rep(1,n)
  ell2 <- (expos * fac2) %*% rep(1,n)

  # Or, use a polynomial
  a0 <- 1
  a1 <- 1.2
  a2 <- 3
  ell0 <- a0 + a1*X + a2*X*X
  ell1 <-      a1   + a2*X*0.5
  ell2 <-             a2*0.5          +0*X

# # Numerical difference approximations
# ell1 <- diff(ell0)/diff(X)
# X1 <- 0.5*(X[-1]+X[-N])
# ell2 <- diff(ell1)/diff(X1)
# X2 <- 0.5*(X1[-1]+X1[-(N-1)])

  ellF <- 2*ell0/sqrt(4 + ell1*ell1 - ell0*ell2)
  ylims <- range(c(0,ell0,ellF))
  plot(ylim=ylims, type="l", col="black", X, ell0)
  points(type="l", col="red", X, ellF)
}

# Given a set of "best" params, calculates everything we could want for the
# full-scale Poisson-noised data Yp.
# 1. The noise-free covariance matrix K (plots as a PNG)
# 2. The noise-free correlation matrix C (plots as a PNG)
# 3. The matrix M (where mu = M y)
# 4. The Cholesky-decomposed covariance matrix L (where sd = L 1)
# 5. odd/even test/train figures
full.scale.xray.denoise <- function(X, Yp, bgr.params, peak.params, file.name=sprintf("fullScaleXrayResults_%s.ROBJ", quickTimestamp())) {
  library("png")
  K.nn <- GramMatrixSE(X=X, hypers=hypersForSE(ell=bgr.params$ell, sigmaFSq=bgr.params$sigmaFSq))
  bin.w <- 10
  for (i in 1:nrow(peak.params)) {
    pp <- peak.params[i,]
    # Cutoff at +- s(4), because this is indistinguishable from zero
    CUT <- 5
    J <- indicesInRange(v=X, Min=pp["xL"]-CUT*pp["ell"], Max=pp["xR"]+CUT*pp["ell"])
    hypers <- hypersForSE(ell=pp["ell"], sigmaFSq=pp["sigmaFSq"])
    K.local <- GramMatrixSE(X=X[J], hypers=hypers)
    mask.vals <- 0.5*(s((X[J] - pp["xL"])/pp["ell"]) - s((X[J] - pp["xR"])/pp["ell"]) )
    mask <- outer(mask.vals, mask.vals)
    K.nn[J,J] <- K.nn[J,J] + mask * K.local
  }
  # 1. K.nn
  save(K.nn, file=file.name)
  # 2. visualizing the matrices
  PNG_output(myMatrix=bin.matrix(K.nn, bin.w), filename="Knn.png", highlight=T)
  K.diag <- sqrt(diag(K.nn))
  K.covar <- outer(K.diag, K.diag)
  PNG_output(myMatrix=bin.matrix(K.nn/K.covar, bin.w), filename="Cnn.png", highlight=T, autoscale=F, base=0, range=1)

  # 3. M
  M <- K.nn %*% solve(K.nn + 0.25*diag(length(X)))
  save(K.nn, M, file=file.name)

  # 4. sigma
  Cov <- (diag(length(X)) - M) %*% K.nn
  sigma <- sqrt(diag(Cov))
  save(K.nn, M, sigma, file=file.name)

  # 5. L
  L <- t(carefulChol(Cov))
  save(K.nn, M, L, sigma, file=file.name)
}

read.exchangeable.datasets <- function(dir.name, base.name, N=79) {
  file.name <- function(i) sprintf("%s/%s_%02d.txt", dir.name, base.name, i)
  get.data <- function(i) as.matrix(read.table(file.name(i),skip=1,col.names=paste(c("Q","Yp"),i,sep='')))
  data.1 <- get.data(1)
  all.data <- matrix(NA, nrow=nrow(data.1), ncol=2*N)
  all.data[,1:2] <- data.1
  colnames(all.data) <- rep(colnames(data.1),N)
  for (i in 2:N) {
    dataset <- get.data(i)
    all.data[,2*i-c(1,0)] <- dataset
    colnames(all.data)[2*i-c(1,0)] <- colnames(dataset)
  }

  # Check that the 2*theta-vals are all the same
  odd <- seq(from=1, to=2*N, by=2)
  unit <- min(diff(all.data[,1]))
  for (i in 1:nrow(all.data)) {
    Q.range <- diff(range(all.data[i,odd]))/unit
    if (Q.range/unit > 1e-3) {
      cat("Index %d exhibits range %f, compared to %f!\n", i, Q.range, unit)
    }
  }

  # Trim redundant columns before returning data
  return (as.data.frame(all.data[,-odd[-1]]))
}

check.datasets.exchangeable <- function(myData, label="DEFAULT", make.fat.img=FALSE) {
  num.sets <- ncol(myData)-1
  # Make an image showing the pixel-by-pixel ranking of the datasets
  myImg <- matrix(NA, ncol=nrow(myData), nrow=num.sets)
  if (make.fat.img) {
    pb <- txtProgressBar(min = 0, max = nrow(myData), style = 3)
    for (px in 1:nrow(myData)) {
      myImg[,px] <- rank(myData[px,-1], ties.method="average")-1
      setTxtProgressBar(pb, px)
    }
    close(pb);
    PNG_output(myMatrix=myImg, filename=sprintf("check_exch_%s.png", label), highlight=T, autoscale=FALSE, base=0, range=num.sets-1)
  }

  # Flag which datasets are in the "extreme" (i.e. either 'top' or 'bottom')
  # 'n', for this pixel

  # First: prepare by calculating the ranking matrices
  make.it.fast <- function() {
    cat("Now, try summing them:\n")
    pb <- txtProgressBar(min = 0, max = nrow(myData), style = 3)
    minRank <<- maxRank <<- 0*myImg
    for (px in 1:nrow(myData)) {
      maxRank[,px] <<- rank(myData[px,-1], ties.method="max")
      minRank[,px] <<- rank(myData[px,-1], ties.method="min")
      setTxtProgressBar(pb, px)
    }
    close(pb);
  }
  make.it.fast()

  # Given this matrix: how would we compute whether we're in the top n?
  extreme.n <- function(n, take.top=TRUE) {
    if (take.top) {
      MIN <- minRank; MAX <- maxRank
    } else {
      MIN <- 1 + num.sets - maxRank
      MAX <- 1 + num.sets - minRank
    }
    made.the.cut <- 0*minRank
    for (px in 1:nrow(myData)) {
      for.sure <- which(MAX[,px] <= n)
      made.the.cut[for.sure,px] <- 1
      # Are there any "borderline" ones?  Partially include them too.
      if (length(for.sure) < n) {
        borderlines <- which (MAX[,px] > n & MIN[,px] <= n)
        made.the.cut[borderlines,px] <- (n + 1 - MIN[borderlines[1],px])/length(borderlines)
      }
    }
    return (rowMeans(made.the.cut))
  }

  for (n in 1:5) {
    top.n <- extreme.n(n=n, take.top=TRUE)
    bot.n <- extreme.n(n=n, take.top=FALSE)
    png(filename=sprintf("extreme-%02d_%s.png", n, label), width=900, height=500)
    y.range <- range(c(0,top.n,bot.n))
    plot(main=sprintf("Which datasets are in the extreme %d? (%s)", n, label), xlab="Dataset number", ylab=sprintf("Fraction of pixels in (top/bottom) %d", n), ylim=y.range,
      col="red", top.n)
    points(col="blue", bot.n)
    legend(x="topleft",
      legend=paste("In", c("Top", "Bottom"), n),
      pch=rep(1,2),
      col=c("red", "blue"))
    dev.off()
  }
}

true.function.factor <- function(Q, y.short, y.long, bounds) {
  Yg.short <- PoissonToGauss(y.short)
  Yg.long <- PoissonToGauss(y.long)
  score <- function(f) MSR(Yg.short, sqrt(f)*Yg.long)
  opt <- optimize(f=score, interval=bounds)
  return (opt$minimum)
}

Xray.evn.odd <- function(X, Yp, bgr.params, peak.params, file.with.huge.matrices) {
  load(file.with.huge.matrices)
  bgr.hypers <- hypersForSE(ell=bgr.params$ell, sigmaFSq=bgr.params$sigmaFSq)
  peak.hypers <- list(logSigmaNSq=0, contribs=peak.params)
  hypers <- list(logSigmaNSq=log(0.25), contribs=list( list(KType="SE", hypers=bgr.hypers), list(KType="MaskedRegions", hypers=peak.hypers)))
  resids <- testTrainEvenOdd(X=X, Y=Yp, hypers=hypers, KType="Sum", K.nn=K.nn)
  save(resids, file="XRAY.resids.evn.odd")
  return (resids)
}

# Trying to adapt ideas of Matthew J Guy, "Fourier block noise reduction: a
# filter for reducing Poisson noise" to 1D case (he used 2D)
FBNR.1D <- function(X=1:length(Y), Y, block.w=16) {
  counts <- cumul <- 0*Y
  N <- length(Y)

  # Helper functions
  filter.out.highest.n.freqs <- function(f, n) {
    g <- f
    II <- c(1:(n+1),(N-n+1):N)
    g[-II] <- 0
    return (Re(fft(g, inverse=TRUE))/block.w)
  }

  for (i in 0:(N-block.w)) {
    JJ <- i + 1:block.w
    counts[JJ] <- counts[JJ] + 1
    var.noise <- mean(Y[JJ]) # Because the noise is Poisson
    var.total <- var(Y[JJ])
    f <- fft(Y[JJ])
    keep <- floor(0.5*block.w)-1
    Yf <- filter.out.highest.n.freqs(f, keep)
    while (keep>0 && var(Yf) > var.total - var.noise) {
      keep <- keep-1
      Yf <- filter.out.highest.n.freqs(f, keep)
    }
    cumul[JJ] <- cumul[JJ] + Yf
    done <- which(counts>0)
  }
  plot(X, Y, type="l", col="gray", main="Test 1D FBNR")
  points(X[done], cumul[done]/counts[done], type="l", col="black")
  done <- which(counts>0)
  return (cumul[done]/counts[done])
}

sigmoid.ff <- function(nu, f, w) 0.5*(1-s((nu-f)/w))
Fourier.low.pass.filter <- function(Y, f, w) {
  N <- length(Y)
  M <- ceiling(N/2)-1
  frac <- seq(from=0, to=1, length.out=M)
  piece <- sigmoid.ff(nu=frac, f=f, w=w)
  Nyquist <- sigmoid.ff(nu=1+(1/M), f=f, w=w)
  if (N%%2>0) Nyquist <- c()
  freq.filt <- c(1,piece,Nyquist,rev(piece))
  return (Re(fft(inverse=TRUE, freq.filt*fft(Y))/N))
}

# Find the Fourier parameters which minimize the MSR between Y and Y.comp.  By
# Fourier parameters, I mean:
# - "f": the mean cutoff frequency (low-pass)
# - "w": the 'transition width' of the frequency cutoff
best.Fourier.params <- function(Y, Y.comp) {
  # Setup the variables and functions
  score <- function(params) {
    f <- params["f"]
    w <- params["w"]
    cat(sprintf("I am trying (f,w)=(%f,%f).\n", f, w))
    Y.filtered <- Fourier.low.pass.filter(Y=Y, f=f, w=w)
    mean.resid <- MSR(Y.filtered, Y.comp)
    #plot(main=sprintf("Fourier denoising: cutoff f=%f, w=%f", params["f"], params["w"]), Y, type="l", col="gray")
    #points(type="l", col="black", Y.comp)
    #points(type="l", col="purple", Y.filtered)
    #cat(sprintf("MSR for (f,w) = (%8.6f,%8.6f): %10.6f\n", params["f"], params["w"], mean.resid))
    cat(sprintf("I am DONE trying (f,w)=(%f,%f).\n", f, w))
    return (mean.resid)
  }

  # Now, do the optimization
  upper.bnds <- c(1.0, 0.30)
  lower.bnds <- c(0.0, 1e-7)
  start.pars <- c(0.9, 0.01)
  names(upper.bnds) <- names(lower.bnds) <- names(start.pars) <- c("f","w")
  opt <- optim(par=start.pars, fn=score, method="L-BFGS-B",
    control=list(fnscale=1, trace=0, maxit=400),
    lower=lower.bnds, upper=upper.bnds)

  return (opt)
}

best.FBNR <- function(Yp, Y.true, n.max) {
  score <<- (1:n.max) * 0
  Y.true.g <- PoissonToGauss(Y.true)
  for (i in 1:n.max) {
    Y.new <- FBNR.1D(Y=Yp, block.w=i)
    score[i] <<- MSR(Y.true.g, PoissonToGauss(Y.new))
    cat(sprintf("Score for %3d: %9.6f\n", i, score[i]))
  }
  return (score)
}

Fourier.compare <- function(Y.noisy, Y.true, Y.filtered=NA, omit.first=TRUE) {
  F.noisy <- fft(Y.noisy)
  I.noisy <-  Im(F.noisy)
  R.noisy <-  Re(F.noisy)
  M.noisy <- abs(F.noisy)
  F.true <- fft(Y.true)
  I.true <-  Im(F.true)
  R.true <-  Re(F.true)
  M.true <- abs(F.true)
  M.filtered <- 0
  if (!is.na(Y.filtered)) {
    F.filtered <- fft(Y.filtered)
    I.filtered <-  Im(F.filtered)
    R.filtered <-  Re(F.filtered)
    M.filtered <- abs(F.filtered)
  }

  type <- "l"
  R.lim <- range(c(R.noisy[-1], R.true[-1]))
  I.lim <- range(c(I.noisy[-1], I.true[-1]))
  M.lim <- range(c(M.noisy[-1], M.true[-1], M.filtered[-1]))
  M.lim <- range(c(M.noisy[-1], M.true[-1]))
  plot(type=type, main="Re[FFT] comparison", xlab="Fourier index", ylab="Amplitude", R.noisy, col="gray", ylim=R.lim); abline(h=0)
  points(type=type, col="black", R.true)

  plot(type=type, main="Im[FFT] comparison", xlab="Fourier index", ylab="Amplitude", I.noisy, col="gray", ylim=I.lim); abline(h=0)
  points(type=type, col="black", I.true)

# ratio <- F.true/F.noisy
# plot(type=type, main="Filter ratio: Re[FFT]", xlab="Fourier index", ylab="Ratio: pure to noisy", abs(ratio), col="black", ylim=c(0,1));abline(h=0)
# for (JJJ in 2:6)
#   points(lwd=2, type=type, abs(quickGaussianBlur(ratio, constSigma=JJJ*5)), col=JJJ, ylim=c(0,1))

  plot(type=type, main="Mod[FFT] comparison", xlab="Fourier index", ylab="Amplitude", M.noisy, col="gray", ylim=M.lim, log="y")
  points(type=type, col="black", M.true)
  #if (!is.na(Y.filtered)) points(type=type, col="blue", M.filtered)
}

denoise.wavelets <- function(X=1:length(Yp), Yp, Y.comp=NA, ...) {
  library(msProcess)
  N <- length(Yp)
  have.Y.comp <- !is.na(head(Y.comp,1))
  Yg <- PoissonToGauss(Yp)

  # First: denoise the data (both as-is and Anscombe'd)
  Yp.dn.wav <- msDenoiseWavelet(x=Yp, ...)
  Yg.dn.wav <- PoissonToGauss(Yp.dn.wav)
  Yg.dn.wav.Ans <- msDenoiseWavelet(x=Yg, ...)
  Yp.dn.wav.Ans <- GaussToPoisson(Yg.dn.wav.Ans)

  # Now: plot the results
  data.labels <- c("Noisy data", "Wavelet", "Wavelet plus anscombe", "True signal")
  data.colors <- c("gray", "red", "blue", "black")
  plot(main="Wavelet denoising", xlab="", ylab="Number of counts", type="l", col=data.colors[1], Yp)
  II <- 1:3
  if (have.Y.comp) {
    points(type="l", col=data.colors[4], Y.comp)
    II <- c(II,4)
    Yg.comp <- PoissonToGauss(Y.comp)
    cat(sprintf("MSR: (Regular, Anscombe) = (%9.7f, %9.7f)\n",
        MSR(Yg.comp, Yg.dn.wav), MSR(Yg.comp, Yg.dn.wav.Ans)))
  }
  n <- length(II)
  points(type="l", col=data.colors[2], Yp.dn.wav)
  points(type="l", col=data.colors[3], Yp.dn.wav.Ans)
  legend(x="topright", legend=data.labels[II], lty=rep(1, n), pch=rep(-1, n), col=data.colors[II])

  # Write the data for gnuplotting, and the MSR as well
  d.f <- data.frame(Q=X, noisy=Yp, wavelet=Yp.dn.wav, waveletAnscombe=Yp.dn.wav.Ans)
  if (have.Y.comp)
    d.f <- cbind(d.f, data.frame(true=Y.comp))
  write.table(file="denoised_wavelets", sep="\t", row.names=FALSE, quote=FALSE, d.f)

  return (list(regular=Yp.dn.wav, Anscombe=Yg.dn.wav.Ans))
}

wavelet.MSR <- function(Yp, Y.comp, ...) {
  Yg <- PoissonToGauss(Yp)
  Y.comp.g <- PoissonToGauss(Y.comp)
  return (MSR(msDenoiseWavelet(x=Yg, ...), Y.comp.g))
}

test.wavelet.MSR <- function(Yp, Y.comp, ...) {
  my.noise.var <- seq(from=0.01, to=0.39, by=0.002)
  my.MSR <- my.noise.var
  for (i in 1:length(my.MSR)) {
    my.MSR[i] <- wavelet.MSR(Yp, Y.comp, noise.variance=my.noise.var[i], ...) 
  }
  plot(main="Denoising residuals, vs. supplied noise variance", xlab="Supplied noise variance", ylab="MSR", my.noise.var, my.MSR, type="l", ylim=c(0, 0.007))
  abline(col="blue", v=0.25)
  abline(col="green", h=wavelet.MSR(Yp, Y.comp, ...))
  legend(x="bottomright", legend=c("MSR with defaults", "Actual noise variance"), lty=rep(1,2), pch=rep(-1,2), col=c("green", "blue"))
  return (data.frame(noise.var=my.noise.var, MSR=my.MSR))
}

check.thresh.scale <- function(Yp, Y.comp, ...) {
  N <- 17
  my.thresh <- exp(seq(from=log(0.5), to=log(2), length.out=N))
  DF <- test.wavelet.MSR(Yp, Y.comp, thresh.scale=my.thresh[1], ...)
  vals <- matrix(ncol=N, nrow=length(DF$MSR))
  vals[,1] <- DF$MSR
  for (i in 2:N) {
    vals[,i] <- test.wavelet.MSR(Yp, Y.comp, thresh.scale=my.thresh[i], ...)$MSR
  }
  myImagePlot(vals)
  best.element <- which(vals==min(vals), arr.ind=TRUE)
  cat(sprintf("We got as low as %f! (noise.var, thresh.scale) = (%f, %f)\n", min(vals), DF$noise.var[best.element[1,1]], my.thresh[best.element[1,2]]))
  return (vals)
}

check.wavelets <- function(Yp, Y.comp, ...) {
  s.nums <- seq(from=6, to=18, by=2)
  s.names <- paste(sep='', 's', s.nums)
  M <- length(s.names)
  N <- 17
  my.thresh <- exp(seq(from=log(0.5), to=log(2), length.out=N))
  vals <- matrix(0, nrow=M, ncol=N)
  for (i in 1:M) {
    for (j in 1:N) {
      vals[i,j] <- wavelet.MSR(Yp, Y.comp, wavelet=s.names[i], thresh.scale=my.thresh[j], ...)
    }
  }
  myImagePlot(vals)
  best.element <- which(vals==min(vals), arr.ind=TRUE)
  cat(sprintf("We got as low as %f! (wavelet, thresh.scale) = (%s, %f)\n", min(vals), s.names[best.element[1,1]], my.thresh[best.element[1,2]]))
  return (vals)
}

residuals.simple.1D <- function(X, Yp, Y.comp=Yp, KType, hypers, KEnvir=constructKEnvir(X=X, KType=KType, hypers=hypers)) {
  cat("Later I'll do this for real!  For now, it's time-saving ad-hoc-ness for me.\n")
}

residuals.simple.1D.oneshot <- function() {

}

Bayes.draws.residuals.Xray <- function(N=1000, L, M, Y.noise, Y.true.enough) {
  Yg.true <- PoissonToGauss(Y.true.enough)
  Yg.noise <- PoissonToGauss(Y.noise)
  Ug <- M%*%Yg.noise
  resids.noise <- resids.true <- 0*(1:N)
  pb <- txtProgressBar(min = 0, max = N, style = 3)
  for(i in 1:N) {
    setTxtProgressBar(pb, i)
    U.rand <- Ug + L %*% rnorm(n=length(Y.noise))
    resids.noise[i] <- MSR(U.rand, Yg.noise)
    resids.true[i] <- MSR(U.rand, Yg.true)
  }
  close(pb);
  x <- data.frame(noisy=resids.noise, true=resids.true)
  write.table(sep="\t", row.names=FALSE, quote=FALSE, signif(x, 5),
    file=sprintf("Bayes_draws_resids_%s", quickTimestamp()))
  return (x)
}

single.curve.resids.Xray <- function(Bayes.g, Y.noise, Y.true.enough) {
  library(msProcess)
  Yg.true <- PoissonToGauss(Y.true.enough)
  Yg.noise <- PoissonToGauss(Y.noise)

  # Calculate the approximations
  AWS.g          <- PoissonToGauss(lpaws(Y.noise , ladjust=2)@theta[,1])
  AWS.Anscombe.g <- lpaws(Yg.noise, ladjust=1)@theta[,1]
  cat("\n")
  Wavelets.g <- msDenoiseWavelet(x=Yg.noise, noise.variance=0.25)

  # Setup the matrix
  resids <- matrix(NA, nrow=4, ncol=2)
  colnames(resids) <- c("true", "noise")
  rownames(resids) <- c("Bayes", "AWS", "AWS.Anscombe", "Wavelets")

  # Fill it in with the residuals
  resids["Bayes", "true"] <- MSR(Bayes.g, Yg.true)
  resids["Bayes", "noise"] <- MSR(Bayes.g, Yg.noise)
  resids["AWS", "true"] <- MSR(AWS.g, Yg.true)
  resids["AWS", "noise"] <- MSR(AWS.g, Yg.noise)
  resids["AWS.Anscombe", "true"] <- MSR(AWS.Anscombe.g, Yg.true)
  resids["AWS.Anscombe", "noise"] <- MSR(AWS.Anscombe.g, Yg.noise)
  resids["Wavelets", "true"] <- MSR(Wavelets.g, Yg.true)
  resids["Wavelets", "noise"] <- MSR(Wavelets.g, Yg.noise)

  return (as.data.frame(resids))
}

# Average a whole bunch of randomly drawn functions, empirically.
empirical.error.bounds <- function(KEnvir.fname="KEnvir.saved.20110719_125210", X, Y.p, N.draws=3e3) {
  N <- length(X)
  load(KEnvir.fname)
  U.g <- smartKnnKInv(KEnvir=KEnvir) %*% PoissonToGauss(Y.p)
  U.p <- GaussToPoisson(U.g)
  L <- smartL(KEnvir=KEnvir)
  is.it.easy <- sqrt(diag(L %*% t(L)))
  U.p1s <- GaussToPoisson(U.g + is.it.easy)
  U.m1s <- GaussToPoisson(U.g - is.it.easy)
  write.table(data.frame(Q=X, Y.noisy=Y.p, U.p=U.p, U.p1s=U.p1s, U.m1s=U.m1s),
    file="forehead.slap.2011-07-20", sep="\t", quote=FALSE, row.names=FALSE)
  rm(KEnvir)

  draws.matrix <- matrix(NA, nrow=N.draws, ncol=N)
  cat("Generating random draws...\n")
  pb <- txtProgressBar(min = 0, max = N.draws, style = 3)
  for (draw in 1:N.draws) {
    setTxtProgressBar(pb, draw)
    U.rand.g <- U.g + L %*% rnorm(n=N)
    draws.matrix[draw,] <- GaussToPoisson(U.rand.g)
  }
  close(pb);

  cat("Tallying statistics...\n")
  emp.mean <- emp.mp1s <- emp.mm1s <- 1:N
  for (pixel in 1:N) {
    emp.mean[pixel] <- mean(draws.matrix[,pixel])
    std.dev <- sd(draws.matrix[,pixel])
    emp.mp1s[pixel] <- emp.mean[pixel] + std.dev
    emp.mm1s[pixel] <- emp.mean[pixel] - std.dev
  }

  results <- data.frame(Q=X, Y.noisy=Y.p, U.p=U.p, 
    U.emp=emp.mean, U.emp.p1s=emp.mp1s, U.emp.m1s=emp.mm1s)
  save(results, file="Empirical_lifeboat.ROBJ")

  write.table(results, file=sprintf("Empirical_uncertainty_%s", quickTimestamp()),
    sep="\t", row.names=FALSE, quote=FALSE)

  return (results)
}

pure.background <- function(X, Y.p, bgr.params, peak.params) {
  Y.g <- PoissonToGauss(Y.p)
  K.bgr <- GramMatrixSE(X=X, hypers=hypersForSE(ell=bgr.params$ell, sigmaFSq=bgr.params$sigmaFSq))
  save(K.bgr, file="bgr.objects/K.bgr.ROBJ")
  K.peaks <- 0*K.bgr
  for (i in 1:nrow(peak.params)) {
    pp <- peak.params[i,]
    # Cutoff at +- s(4), because this is close enough to zero
    CUT <- 5
    J <- indicesInRange(v=X, Min=pp["xL"]-CUT*pp["ell"], Max=pp["xR"]+CUT*pp["ell"])
    hypers <- hypersForSE(ell=pp["ell"], sigmaFSq=pp["sigmaFSq"])
    K.local <- GramMatrixSE(X=X[J], hypers=hypers)
    mask.vals <- 0.5*(s((X[J] - pp["xL"])/pp["ell"]) - s((X[J] - pp["xR"])/pp["ell"]) )
    mask <- outer(mask.vals, mask.vals)
    K.peaks[J,J] <- K.peaks[J,J] + mask * K.local
  }
  save(K.peaks, file="bgr.objects/K.peaks.ROBJ")
  K.noise <- 0.25*diag(nrow(K.bgr))
  K.inv <- solve(K.bgr + K.peaks + K.noise) 
  save(K.inv, file="bgr.objects/K.inv.ROBJ")
}

pure.background.continued <- function(X, Y.p) {
  TS <- quickTimestamp()
  load("bgr.objects/K.inv.ROBJ")

  curves <- data.frame(Q=X, Y.noisy=Y.p)

  # Background info
  load("bgr.objects/K.bgr.ROBJ")
  K.bgr.inv <- K.bgr %*% K.inv
  Y.bgr.g <- K.bgr.inv %*% PoissonToGauss(Y.p)
  Y.bgr.p <- GaussToPoisson(Y.bgr.g)
  Cov.bgr <- (diag(nrow(K.bgr)) - K.bgr.inv) %*% K.bgr
  Y.sd.g <- sqrt(diag(Cov.bgr))
  Y.bgr.p.p1s <- GaussToPoisson(Y.bgr.g + Y.sd.g)
  Y.bgr.p.m1s <- GaussToPoisson(Y.bgr.g - Y.sd.g)
  save(Cov.bgr, file="bgr.objects/Cov.bgr.ROBJ")
  rm(Cov.bgr, K.bgr.inv, K.bgr)
  curves <- cbind(curves, data.frame(
      Y.bgr.g=Y.bgr.g, Y.bgr.p=Y.bgr.p, Y.bgr.p.p1s=Y.bgr.p.p1s, Y.bgr.p.m1s=Y.bgr.p.m1s))
  write.table(file=sprintf("separate_contributions_%s.dat", TS), curves,
    sep="\t", row.names=FALSE, quote=FALSE)

  # Peak info
  load("bgr.objects/K.peaks.ROBJ")
  K.peaks.inv <- K.peaks %*% K.inv
  Y.peaks.g <- K.peaks.inv %*% PoissonToGauss(Y.p)
  Y.peaks.p <- GaussToPoisson(Y.peaks.g)
  Cov.peaks <- (diag(nrow(K.peaks)) - K.peaks.inv) %*% K.peaks
  Y.sd.g <- sqrt(diag(Cov.peaks))
  Y.peaks.p.p1s <- GaussToPoisson(Y.peaks.g + Y.sd.g)
  Y.peaks.p.m1s <- GaussToPoisson(Y.peaks.g - Y.sd.g)
  save(Cov.peaks, file="bgr.objects/Cov.peaks.ROBJ")
  rm(Cov.peaks, K.peaks.inv, K.peaks)
  curves <- cbind(curves, data.frame(
      Y.peaks.g=Y.peaks.g, Y.peaks.p=Y.peaks.p, Y.peaks.p.p1s=Y.peaks.p.p1s, Y.peaks.p.m1s=Y.peaks.p.m1s))
  write.table(file=sprintf("separate_contributions_%s.dat", TS), curves,
    sep="\t", row.names=FALSE, quote=FALSE)

}
