%matplotlib inline
# First, set up envirinment
from pylab import *
mpl.rcParams['figure.figsize'] = (8,2.5)
mpl.rcParams['figure.dpi'] = 200
mpl.rcParams['image.origin'] = 'lower'
mpl.rcParams['image.cmap']='magma'
np.seterr(divide='ignore', invalid='ignore');

L = 128  # Grid size
P = L/10 # Grid spacing
α = 0.5  # Grid "sharpness"
μ = 1500/L**2 # Mean firing rate (spikes per sample)

# 2D grid coordinates as complex numbers
c = arange(L)-L//2
coords = 1j*c[:,None]+c[None,:]

def ideal_hex_grid(L,P):
    # Build a hexagonal grid by summing three cosine waves
    θs = exp(1j*array([0,pi/3,2*pi/3]))
    return sum([cos((θ*coords).real*2*pi/P) for θ in θs],0)

# Generate intensity map: Exponentiate and scale mean rate
λ0 = exp(ideal_hex_grid(L,P)*α)
λ0 = λ0*μ/mean(λ0)

# Zero pad edges
pad  = L*1//10
mask = zeros((L,L),dtype='bool')
mask[pad:-pad,pad:-pad]=1

# Simulate oddly shaped arena
mask[:-L*4//10,L*3//10:L*4//10] = False
λ0 = λ0*mask

# For realism, add some background rate changes
λ0 = λ0*(1-abs(coords/(L-2*pad)+0.1))

# Simulated a random number of visits to each location 
# as well as Poisson spike counts at each location
N = poisson(2*(1-abs(coords/L-0.2j)),size=(L,L))*mask
K = poisson(λ0*N)

def pscale(x,q1=0.5,q2=99.5,domask=True):
    # Plot helper: Scale data by percentiles
    u  = x[mask] if domask else x
    p1 = percentile(u,q1)
    p2 = percentile(u,q2)
    x  = clip((x-p1)/(p2-p1),0,1)
    return x*mask if domask else x
    
def showim(x,t='',**kwargs):
    # Plot helper: Show image with title, no axes
    if len(x.shape)==1: x=x.reshape(L,L)
    imshow(pscale(x,**kwargs));
    axis('off');
    title(t);

subplot(131); showim(mask,'Environmet')
subplot(132); showim(λ0,'True rate')
subplot(133); showim(K,'Binned Spikes');

def regλ(N,K,ρ=1.3,γ=0.5):
    # Regularized rate estimate
    return (K+ρ*(sum(K)/sum(N)-γ)+γ)/(N+ρ)

from scipy.stats import pearsonr
def printstats(a,b,message=''):
    # Print RMSE and correlation between two rate maps
    a,b = a.reshape(L,L)[mask],b.reshape(L,L)[mask]
    NMSE = mean((a-b)**2)/sqrt(mean(a**2)*mean(b**2))
    print(message+':')
    print('∙ Normalized MSE: %0.1f%%'%(100*NMSE))
    print('∙ Pearson correlation: %0.2f'%pearsonr(a,b)[0])

# Rate per bin using naive and regularized estimators
λhat1 = nan_to_num(K/N)
λhat2 = regλ(N,K)
printstats(λ0,λhat1,'K/N Estimator')
printstats(λ0,λhat2,'Regularized Estimator')

# Effect of regularization on error
ρs,γs = linspace(1e-2,2,51),linspace(0,1,51)
MAE = array([[mean(abs(λ0-regλ(N,K,ρ,γ))**2) for ρ in ρs] for γ in γs])

subplot(121); showim(λhat2,'$\hat\lambda$, γ=0.5, ρ=1.3')
subplot(122); imshow(-log(MAE),extent=(0,2,0,1),aspect=2)
xticks([0,1,2]); yticks([0,.5,1]); xlabel('ρ'); ylabel('γ');
title('Regularized $\hat\lambda$ Error')
colorbar(label='$-\log(\operatorname{MSE})$');

K/N Estimator:
∙ Normalized MSE: 236.6%
∙ Pearson correlation: 0.17
Regularized Estimator:
∙ Normalized MSE: 206.4%
∙ Pearson correlation: 0.19

def blurkernel(L,σ,normalize=False):
    # Gaussian kernel
    k = exp(-(arange(-L//2,L//2)/σ)**2)
    if normalize: 
        k /= sum(k)
    return fftshift(k)

def conv(x,K):
    # Compute circular 2D convolution using FFT
    # Kernel K should already be fourier-transformed
    return real(ifft2(fft2(x.reshape(K.shape))*K))

def blur(x,σ,**kwargs):
    # 2D Gaussian blur via fft
    kern = fft(blurkernel(x.shape[0],σ,**kwargs))
    return conv(x,outer(kern,kern))

def kdeλ(N,K,σ,**kwargs):
    # Estimate rate using Gaussian KDE
    return regλ(blur(N,σ),blur(K,σ),**kwargs)

fgσ   = 4    # Kernel smoothing radius
bgσ   = L/15 # Background kernel radius
λhat  = kdeλ(N,K,fgσ) 
λbg   = kdeλ(N,K,bgσ)
λbar  = λhat/λbg

printstats(λ0,λhat,'KDE Error')
subplot(131); showim(λhat,'Rate, KDE, σ=%d'%fgσ);
subplot(132); showim(λbg ,'Background Rate');
subplot(133); showim(λbar,'Normalized Rate');

KDE Error:
∙ Normalized MSE: 30.5%
∙ Pearson correlation: 0.59

def zeromean(x):
    # Mean-center data, accounting for masked-out regions
    x = x.reshape(mask.shape)
    return (x-mean(x[mask]))*mask

def fft_acorr(x):
    # Zero-lag normalized to match signal variance
    x   = zeromean(x)
    # Window attenuates boundary artefacts
    win = hanning(L)
    win = outer(win,win)
    # Calculate autocorrelation using FFT
    psd = (abs(fft2(x*win))/L)**2
    acr = fftshift(real(ifft2(psd)))
    # Adjust peak for effects of mask, window
    return acr*var(x[mask])/acr[L//2,L//2]

def radial_average(y):
    # Get radial autocorrelation by averaging 2D autocorrelogram
    i = int32(abs(coords)) # Radial distance
    a = array([mean(y[i==j]) for j in range(L//2+1)])
    return concatenate([a[::-1],a[1:-1]])

def radial_acorr(y):
    # Autocorrelation as a function of distance
    return radial_average(fft_acorr(y))

def fft_upsample_1D(x,factor=4):
    '''
    Upsample 1D array using the FFT
    '''
    n  = len(x)
    n2 = n*factor
    f  = fftshift(fft(x))*hanning(n)
    f2 = np.complex128(np.zeros(n2))
    r0 = (n2+1)//2-(n+0)//2
    f2[r0:r0+n] = f
    return np.real(ifft(fftshift(f2)))*factor

from scipy.signal import find_peaks
def acorr_peak(r,F=6):
    # sinc upsample at ×F resolution to get distance to first peak
    r2 = fft_upsample_1D(r,F)
    return min(find_peaks(r2[len(r2)//2:])[0])/F-1,r2

λhat   = kdeλ(N,K,L/75)         # Small blur for initial esitmate
acorr2 = fft_acorr(λhat)        # Get 2D autocorrelation
acorrR = radial_average(acorr2) # Get radial autocorrelation
res    = 5                      # Subsampling resolution
P,acup = acorr_peak(acorrR,res) # Distance to first peak in bins    

figure(figsize=(8,2))
subplot(121); showim(acorr2,'Autocorrelation',domask=False)
subplot(122); plot(linspace(-L/2,L/2,L*res)-.5/res,acup)
[gca().spines[s].set_visible(0) for s in ['top','right','bottom','left']]
axhline(0,color='k',lw=.8); xticks([0]); xlabel('Distance'); 
axvline(0,color='k',lw=.8); yticks([0]); ylabel(' '*9+'Correlation',labelpad=-9)
axvline((0+1+P),color='y',lw=.8);
title('Radial Autocorrelation');

fgσ  = P/pi
bgσ  = fgσ*2.5
λhat = kdeλ(N,K,fgσ) 
λbg  = kdeλ(N,K,bgσ)

printstats(λ0,λhat,'KDE')
subplot(131); showim(λhat,'Rate, KDE, σ=%d'%fgσ);
subplot(132); showim(λbg ,'Background Rate');
subplot(133); showim(λbar,'Normalized Rate');

KDE:
∙ Normalized MSE: 30.3%
∙ Pearson correlation: 0.60

# Prepare error model for GP
ε0 = mean(K)/mean(N) # variance per measurement
τe = N.ravel()/ε0    # precision per bin

# Build 2D kernel for the prior
# Scale kernel height to match data variance (heuristic)
k1   = blurkernel(L,fgσ*2)
y    = nan_to_num(K/N)
kern = outer(k1,k1)*var(y[mask])

from scipy.linalg import circulant 

def kernel_to_covariance(kern):
    # Covariance is a doubly block-circulant matrix
    # Use np.circulant to build blocks, then copy
    # with shift to make 2D block-circulant matrix
    assert(argmax(kern.ravel())==0)
    L = kern.shape[0]
    b = array([circulant(r) for r in kern])
    b = b.reshape(L**2,L).T
    s = array([roll(b,i*L,1) for i in range(L)])
    return s.reshape(L**2,L**2)

def repair_small_eigenvalues(kern,mineig=1e-6):
    # Kernel must be positive; fix small eigenvalues
    assert(argmax(kern.ravel())==0)
    kfft = fft2(kern)
    keig = abs(kfft)
    υmin = mineig*np.max(keig)
    zero = keig<υmin
    kfft[zero] = υmin
    kern = real(ifft2(maximum(υmin,kfft)))
    return kern

import time
ttic = None
def tic(msg=''):
    # Timer routine to track performance
    global ttic
    t = time.time()*1000
    if ttic and msg: 
        print(('Δt = %d ms'%(t-ttic)).ljust(14)\
              +'elapsed for '+msg)
    ttic = t
    
def showkn(k,t):
    # Plot helper; Shift convolution kernel to plot
    imshow(fftshift(k)); axis('off'); title(t);

from scipy.sparse.linalg import minres,LinearOperator

def solveGP(kern,y,τe,tol=1e-4,reg=1e-5):
    # Minimum residual solver is fast
    kern = repair_small_eigenvalues(kern,reg)
    knft = fft2(kern)
    τy   = τe*zeromean(y).ravel()
    Στy  = conv(τy,knft).ravel()
    Hv   = lambda v:conv(τe*v,knft).ravel() + v
    ΣτεI = LinearOperator((L**2,L**2),Hv,Hv,dtype=np.float64)
    μ    = minres(ΣτεI,Στy,tol=tol)[0]
    return μ.reshape(L,L) + mean(y[mask])

λGP1 = solveGP(kern,y,τe.ravel())
printstats(λ0,λGP1,'GP regression error')
subplot(131); showkn(kern,'Prior Kernel');
subplot(132); showim(y,'Observations');
subplot(133); showim(λGP1/λbg,'Posterior Rate');

GP regression error:
∙ Normalized MSE: 25.4%
∙ Pearson correlation: 0.68

def mirrorpad(y,pad):
    # Reflected boundary for convolution
    y[:pad, :]=flipud(y[ pad: pad*2,:])
    y[:, :pad]=fliplr(y[:, pad: pad*2])
    y[-pad:,:]=flipud(y[-pad*2:-pad,:])
    y[:,-pad:]=fliplr(y[:,-pad*2:-pad])
    return y

# Uniform measurement error ⇒ GP = convolution
μτ   = mean((N/ε0)[mask])
kft  = fft2(kern)
gft  = (kft*μτ)/(kft*μτ+1)
y    = mirrorpad(nan_to_num(K/N),pad)
μy   = mean(y[mask])
λcnv = conv(y-μy,gft)+μy

printstats(λcnv,λGP1,'Error between GP regression and convolution')
subplot(121); showkn(real(ifft2(gft)),'Convolution Kernel');
subplot(122); showim(λcnv/λbg,'Convolution Approximation');

Error between GP regression and convolution:
∙ Normalized MSE: 17.4%
∙ Pearson correlation: 0.92

from scipy.interpolate import interp1d
def radial_kernel(rk):
    # Make radially symmetric 2D kernel from 1D radial kernel
    r    = abs(coords)
    kern = interp1d(arange(L//2),rk[L//2:],
                    fill_value=0,bounds_error=0)(r)
    return fftshift(kern)

# Make symmetric kernel from autocorrelation of ideal grid
acgrd = fft_acorr(ideal_hex_grid(L,P))
kernR = radial_kernel(radial_average(acgrd))

# Restrict kernel to local neighborhood and normalize
window   = abs(coords)<P*sqrt(2)
kern0    = blur(kernR*fftshift(window),P/pi)
kern0    = kern0/np.max(kern0)

subplot(131); showim(acgrd,'Ideal Autocorrelation',domask=False);
subplot(132); showkn(kernR,'Radial Kernel');
subplot(133); showkn(kern0,'Windowed');

def zerolag(ac,r=3):
    # Estimate true zero-lag variance via quadratic interpolation.
    z = array(ac[L//2-r:L//2+r+1])
    v = arange(r*2+1)
    return polyfit(v[v!=r],z[v!=r],2)@[r**2,r,1]

# Estimate zero-lag variance and scale kernel
acorrR1 = radial_acorr(regλ(N,K))
acorrR2 = copy(acorrR1)
v0      = zerolag(acorrR1)
kern    = kern0*v0
acorrR2[L//2] = v0

ε0   = mean((K/N)[N>0])
λGP2 = solveGP(kern,y,N.ravel()/ε0)
printstats(λ0,λGP2,'GP with periodic kernel')

subplot(121)
axhline(0,color='k',lw=.8)
plot(acorrR1[L//2:],label='Autocorrelation')
plot(kern[0,:L//2] ,label='Kernel')
xticks([0]); xlabel('Distance');    xlim(0,L//4)
yticks([0]); ylabel('Correlation'); ylim(ylim()[0],v0*4)
[gca().spines[s].set_visible(0) for s in ['top','right','bottom']];
legend(); title('Height Calibration')
subplot(122); showim(λGP2,t='Posterior Rate');

GP with periodic kernel:
∙ Normalized MSE: 27.4%
∙ Pearson correlation: 0.79

# Use estimated rate as measurement error variance
ve = kdeλ(N,K,fgσ,ρ=1,γ=.5)
y  = nan_to_num(K/N)
λGP3 = solveGP(kern,y,(N/ve).ravel())
printstats(λ0,λGP3,'GP')
subplot(121); showim(1/ve,q2=95,t='Precision ($1/\sigma^2_\epsilon$) Estimate');
subplot(122); showim(λGP3,'Posterior Rate');

GP:
∙ Normalized MSE: 48.6%
∙ Pearson correlation: 0.73

# define a "safe log" function
minrate = 1e-2
slog = lambda x:log(maximum(minrate,x))

# Precompute variables; Passed as globals to jac/hess
n    = N.ravel()
y    = nan_to_num(K/N)
lλh  = slog(kdeλ(N,K,fgσ))
kern = kern0*zerolag(radial_acorr(lλh))
kern = repair_small_eigenvalues(kern,1e-5)
knft = fft2(kern)
kift = 1/knft
kift[0,0]=0

# preconditioner given by prior covariance
Mv = lambda v:conv(v,knft).ravel()
M  = LinearOperator((L**2,)*2,Mv,Mv,dtype=np.float32)

def jacobian(w):
    J0 = conv(w,kift).ravel()
    Jl = n*(exp(w)-y.ravel())
    return J0+Jl

def hessian(w):
    # Hessian as linear operator to use with minres
    nλ = n*exp(w)
    Hv = lambda u:conv(u,kift).ravel()+u*nλ
    return LinearOperator((L**2,)*2,Hv,Hv,dtype=np.float64)

def newton_raphson(lλh,J,H,tol=1e-3,mtol=1e-5):
    u = lλh.ravel()
    for i in range(10):
        Δ = -minres(H(u),J(u),tol=mtol,M=M)[0]
        u += Δ
        if max(abs(Δ))<tol: return u
    print('Iteration did not converge')

w1    = newton_raphson(lλh,jacobian,hessian)
LGCP1 = w1.reshape(L,L)
printstats(slog(λ0), LGCP1,'LGCP, log-rate')
subplot(131); showkn(kern ,'Kernel');
subplot(132); showim(y    ,'Observations');
subplot(133); showim(LGCP1,'Log-Rate');

LGCP, log-rate:
∙ Normalized MSE: 2.1%
∙ Pearson correlation: 0.75

λhat = kdeλ(N,K,fgσ) # Foreground rate
λbg  = kdeλ(N,K,bgσ) # Background rate
lλh  = slog(λhat)    # Log rate
lλb  = slog(λbg)     # Log background

# Precompute variables; Passed as globals to jac/hess
kern = kern0*zerolag(radial_acorr(lλh-lλb))
kern = repair_small_eigenvalues(kern,1e-5)
knft = fft2(kern)
kift = 1.0/knft
Mv   = lambda v:conv(v,knft).ravel()
M    = LinearOperator((L**2,)*2,Mv,Mv,dtype=np.float32)

def jacobian(w):
    J0 = conv(w,kift).ravel()
    Jl = n*(exp(w+lλb.ravel())-y.ravel())
    return J0+Jl

def hessian(w):
    nλ = n*exp(w+lλb.ravel())
    Hv = lambda u:conv(u,kift).ravel()+u*nλ
    return LinearOperator((L**2,)*2,Hv,Hv,dtype=np.float64)

# Fit model and unpack result
w2    = newton_raphson(lλh-lλb,jacobian,hessian)
LGCP2 = w2.reshape(L,L) + lλb
printstats(slog(λ0),LGCP2,'LGCP, log-rate')
subplot(131); showkn(kern,'Kernel');
subplot(132); showim(y,'Observations');
subplot(133); showim(w2,'Normalized Log-Rate');

LGCP, log-rate:
∙ Normalized MSE: 2.4%
∙ Pearson correlation: 0.73

def LGCP_convolutional(N,K,fgσ,bgσ,kern,pad):
    # Evaluate via convolution
    kern = repair_small_eigenvalues(kern)
    y  = mirrorpad(nan_to_num(K/N),pad)
    λ  = kdeλ(N,K,fgσ) 
    lb = slog(kdeλ(N,K,bgσ))
    w  = slog(λ)-lb
    β  = mean(w[N>0])
    w  = w-β
    c  = mean(1/(N*λ)[N>0])
    Σf = fft2(kern)
    Gf = c/(c+Σf)
    w -= conv(w+conv(N*(λ-y),Σf),Gf)
    return w+β+lb

LGCP3 = LGCP_convolutional(N,K,fgσ,bgσ,kern,pad)
printstats(LGCP2,LGCP3,'Error between Newton-Raphson and convolution')

subplot(121); showim(LGCP2-lλb,'LGCP Log-Rate')
subplot(122); showim(LGCP3-lλb,'Convolution');

Error between Newton-Raphson and convolution:
∙ Normalized MSE: 0.6%
∙ Pearson correlation: 0.97

def findpeaks(q,th=-inf,r=1):
    # Local maxima > th in square neighborhood radius r. 
    L  = q.shape[0]
    D  = 2*r
    Δ  = range(D+1)
    q0 = q[r:-r,r:-r,...]
    p  = q0>th
    for i,j in {(i,j) for i in Δ for j in Δ if i!=r or j!=r}:
        p &= q0>=q[i:L+i-D,j:L+j-D,...]
    p2 = zeros(q.shape,bool)
    p2[r:-r,r:-r,...] = p
    return p2

pxy = array(where((findpeaks(LGCP2)*mask).T))
figure(figsize=(4,3));
showim(w2,'Peaks'); 
scatter(*pxy,s=5,facecolor='k',edgecolor='w',lw=0.4);

def mirror(x):
    # Mirror LxL data up to 2L+1 x 2L+1
    x = x.reshape(L,L)
    return block([[x,fliplr(x[:,1:])],[flipud(x[1:,:]),fliplr(flipud(x[1:,1:]))]])

def padout(kern):
    # Zero-pad LxL kernel up to 2L+1 x 2L+1
    k2 = zeros((L*2-1,L*2-1))
    k2[L//2:L//2+L,L//2:L//2+L] = fftshift(kern)
    return fftshift(k2)

# Why this DCT implementation? 
# - This implmementation can be used directy to evaluate convolution with reflected 
#   boundary conditions via pointwise multiplication (convolution theorem)
# - It's based on the FFT of real symmetric data, so the data packing and interpretation
#   of the coefficient matrix is the same as that of a FFT of twice the size
# - The eigenvalues are real-valued, so they can be used directly with 
#   linear algebra routines that require real-valued input

normalization = 1/(L*2+1)

def dct2v(x):
    # DCT Option 1: reflect data to create symmetry
    x = x.reshape(L,L)
    return real(fft2(mirror(x)))[:L,:L]*normalization
def dct2k(k):
    # DCT Option 2: if kernel already symmetric, zero pad
    return real(fft2(padout(k.reshape(L,L))))[:L,:L]
def idct2(x):
    # Inverse DCT
    return real(fft2(mirror(x)))[:L,:L]*normalization
def dctconv(v,kct):
    # Apply convolution operator via DCT
    xct = dct2v(v)
    return idct2(xct*kct).ravel()

# DCT inverse should work and DCT
# and FFT convolution should be similar
x  = randn(L,L)
x1 = idct2(dct2v(x))
printstats(x,x1,'DCT inverse')
x1 = conv(x,fft2(kern))
x2 = dctconv(x,dct2k(kern))
printstats(x1,x2,'DCT convolution')

DCT inverse:
∙ Normalized MSE: 0.0%
∙ Pearson correlation: 1.00
DCT convolution:
∙ Normalized MSE: 1.0%
∙ Pearson correlation: 0.99

# Low-rank approximation in frequency space using the DCT
from scipy.sparse import coo_matrix
keig = abs(dct2k(kern0))
print('minimum eigenvalue magnitude %e'%np.min(keig))
print('maximum eigenvalue magnitude %e'%np.max(keig))
mine = 0.005*np.max(keig)
use2 = keig>=mine 
use1 = any(use2,0)
use3 = use2[:,use1][use1,:].ravel()
M2   = sum(use2)
M1   = sum(use1)
down = coo_matrix(eye(L*L)[use2.ravel()])
print('Using %d components'%M2)

minimum eigenvalue magnitude 0.000000e+00
maximum eigenvalue magnitude 1.251435e+02
Using 931 components

def dct2lr(v):
    # send vector into low-rank representation
    if np.all(v==0): return zeros(M2)
    v = v.reshape(L,L)
    for i in range(2):
        v = block([v,fliplr(v[:,1:])])
        v = real(fft(v)).T[:L][use1]*normalization
    return v.ravel()[use3]

def idct2lr(u):
    # expand vector from subspace
    u = u.ravel()#@pcndi
    return idct2(down.T@u).ravel()

def dct2Alr(A):
    # collapse L²×L² matrix to subspace
    A = array([dct2lr(a) for a in A.reshape(L*L,L,L)]).T
    A = array([dct2lr(a) for a in A.reshape(M2,L,L)]).T
    return A

# Expand matrix on left size from subspace
def idct2Alr_left(A):
    # Expand compressed representation on the left
    # A is MxM, we return NxM, N=L*L
    return array([idct2lr(a) for a in A.T]).T

def dct2klr(k):
    # DCT Option 2: if kernel already symmetric, zero pad
    return dct2k(k)[use2].ravel()

def dctconvlr(v,klr):
    # Apply convolution operator via DCT
    xlr = dct2lr(v)
    return idct2lr(xlr*klr).ravel()

# low-rank DCT inverse should work and low-rank DCT 
# and FFT convolution should be similar if input is
# well-approximated by low-rank.
x  = randn(L,L)
x1 = conv(x,fft2(kern))
x2 = dctconvlr(x,dct2klr(kern))
printstats(x1,x2,'low-rank convolution')
print(mean(x1),mean(x2))
x1 = idct2lr(dct2lr(x2))
printstats(x1,x2,'low-rank inverse')
print(mean(x1),mean(x2))

low-rank convolution:
∙ Normalized MSE: 25956.5%
∙ Pearson correlation: 1.00
-0.04962543031971363 -0.00018115893491703268
low-rank inverse:
∙ Normalized MSE: 25905.1%
∙ Pearson correlation: 1.00
-6.939700847602088e-07 -0.00018115893491703268

from scipy.linalg import cholesky as chol
from scipy.linalg.lapack import dtrtri
from scipy.linalg import solve_triangular as stri

# Calculate low-rank posterior covariance
klr  = maximum(dct2klr(kern),1e-5)
kilr = 1/klr
v   = eye(L)
v   = block([v,fliplr(v[:,1:])])
v   = real(fft(v)).T[:L][use1]*normalization
G   = einsum('ml,ML->mMlL',v,v).reshape(M1*M1,L*L)[use3]
Hlr = diag(kilr*(L*2+1)**-2) + (G*(n*exp(LGCP2).ravel()))@G.T
Clr = chol(Hlr)
Dlr = dtrtri(Clr)[0]
Qlr = G.T@Dlr

def dx_op(L):
    # 2D difference operator in the 1st coordinate
    dx = zeros((L,L))
    dx[0, 1]=-.5
    dx[0,-1]= .5
    return dx

def hessian_2D(q):
    # Get Hessian at all points
    dx  = dx_op(q.shape[0])
    f1  = fft2(dx)
    f2  = fft2(dx.T)
    d11 = conv(q,f1*f1)
    d12 = conv(q,f2*f1)
    d22 = conv(q,f2*f2)
    return array([[d11,d12],[d12,d22]]).transpose(2,3,0,1)

q  = w2.reshape(L,L)
dx = dx_op(L)
Hx = hessian_2D(q)
Dx = det(Hx)

from scipy.stats import chi2
def covariance_crosshairs(S,p=0.8):
    # Generate a collection of (x,y) lines denoting the confidence 
    # bound for p fraction of data from 2D covariance matrix S
    sigma  = chi2.isf(1-p,df=2)
    e,v    = eigh(S)
    lines  = list(exp(1j*linspace(0,2*pi,181)))
    lines += [nan]+list(   linspace(-1,-.2,5))
    lines += [nan]+list(1j*linspace(-1,-.2,5))
    lines += [nan]+list(   linspace(.2,.95,5))
    lines += [nan]+list(1j*linspace(.2,.95,5))
    lines = array(lines)
    lines = array([lines.real,lines.imag])*sigma*(e**0.5)[:,None]
    return solve(v,lines)

def cinv(X,repair=False):
    # Invert matrix via Cholesky factorization
    ch = chol(X)
    ich = dtrtri(ch)[0]
    return ich.dot(ich.T)

def csolve(H,J):
    # Solve PSD linear system x = H^{-1}J via Cholesky factorization
    C = chol(H)
    return stri(C,stri(C.T,J,lower=True))

def plot_peakbounds(pxy,P):
    # D should be the cholesky factor of the Hessian of the log-posterior
    lx,ly = [],[]
    for x2,x1 in pxy.T:
        # Jacobian at x0
        Δx1 = roll(dx  ,(x1,x2),(0,1))
        Δx2 = roll(dx.T,(x1,x2),(0,1))
        J   = array([Δx1,Δx2]).reshape(2,L**2)
        # Peak location confidence
        ΣxJD = csolve(-Hx[x1,x2],J@G.T@Dlr)
        Σx0  = ΣxJD@ΣxJD.T
        # Plot if peak is acceptably localized
        if max(eigh(Σx0)[0])<P*2:
            cx,cy = covariance_crosshairs(Σx0,p=0.9)
            lx += [nan] + list(cx+x2)
            ly += [nan] + list(cy+x1)
    plot(lx,ly,color='w',lw=1.6)
    plot(lx,ly,color='k',lw=0.4)
    axis('off'); title('90% Confidence');
    xlim(0,L); ylim(0,L)

figure(figsize=(4,3));
showim(q); 
plot_peakbounds(pxy,P);

softmask = blur(mask,5,normalize=True)

def peak_density(w,Niter=1000):
    # w: posterior mean or mode vector
    # Ch: cholesky factor of log-posterior Hessian
    q = Qlr@randn(M2,Niter)
    q = (q+w2.ravel()[:,None]).reshape(L,L,Niter)
    q = (q-mean(mean(q,2)[mask]))*softmask[:,:,None]
    peaks = findpeaks(q,th=std(q))
    dnsty = mean(peaks,axis=2) + 1/Niter
    μhght = nan_to_num(sum(q*peaks,2)/sum(peaks,2))
    return dnsty,μhght

dnsty,μhght = peak_density(w2)
subplot(121); showim((dnsty),'Peak Density')
colorbar(label='$\log\,\Pr(\operatorname{peak})$')
subplot(122); showim(μhght*dnsty,'Height$\cdot\Pr$(peak)')
tight_layout()

Applying Gaussian process models to hippocampal grid cell data

Introduction

Bayesian inference in multivariate Gaussian distributions

Gaussian process regression

Exploring Gaussian Process methods in grid cell data

Simulating some data

Estimating rate in each bin

Estimating rate by smoothing

Estimating rate via Kernel Density Estimation (KDE)

Inspecting the data

Smoothing with Gaussian Process regression

Sometimes GP regression reduces to convolution

Better priors

Heuristic approximation of Poisson noise

Log-Gaussian Cox Processes

The maximum a posteriori estimate

Finding the maximum a posteriori using Newton-Raphson

Note: (pre) conditioning

Note: when to use a separate bias term?

Note: Iteratevely Reweighted Least-Squares (IRLS)

Note: initializing a prior for log-Gaussian inference

Note: Hessian-vector products

Subtracting the background

Convolution approximation

Interim summary

Estimating confidence intervals around peaks

Using the Laplace approximation to calculate uncertainty in peak location

Building a low-rank model of the posterior variance

Use sampling to assess the probability of a peak

Putting it all together

Appendix: numerical considerations in Gaussian Process (GP) and Log-Gaussian Cox Process (LGCP) regression

Tricks for faster linear algebra

Matrix inversion and linear system solving using Cholesky factorization

Multiplication using the Fast Fourier Transform (FFT)

Multiplication by a diagonal matrix is element-wise multiplication

Four ways to do Gaussian process regression

Form (a)

Form (b)

Form (c)

Form (d)

When to use which?