bootstrap_analysis: better noise estimation and add option to speci…

…fy noise levels (#334) * bootstrap_analysis: allow specifying noise levels - otherwise estimate noise level from experimental dataset not from the fit residuals * refactor bootstrap_analysis for more general and faster tests
JeschkeLab · Jun 9, 2022 · 789aa58 · 789aa58
1 parent 8b4e91a
commit 789aa58
Show file tree

Hide file tree

Showing 2 changed files with 68 additions and 122 deletions.
diff --git a/deerlab/bootstrap_analysis.py b/deerlab/bootstrap_analysis.py
@@ -9,8 +9,9 @@
 from joblib import Parallel, delayed
 from deerlab.classes import UQResult
 from deerlab.utils import isnumeric
+from deerlab.noiselevel import noiselevel
 
-def bootstrap_analysis(fcn,Vexp,Vfit, samples=1000, resampling='gaussian', verbose = False, cores=1, memorylimit=8):
+def bootstrap_analysis(fcn,Vexp,Vfit, samples=1000, noiselvl=None, resampling='gaussian', verbose = False, cores=1, memorylimit=8):
     r""" 
     Bootstrap analysis for uncertainty quantification
 
@@ -33,6 +34,11 @@ def bootstrap_analysis(fcn,Vexp,Vfit, samples=1000, resampling='gaussian', verbo
         results improve with the number of boostrap samples evaluated, the 
         default is 1000.
 
+    noiselvl : scalar or list thereof
+        Noise level of the input dataset(s), specified as standard deviations.
+        If not specified, these are automatically estimated from the experimental
+        dataset(s). 
+
     resampling : string, optional
         Specifies the method employed for re-sampling new bootstrap samples.
 
@@ -94,10 +100,11 @@ def myfcn(V):
         raise KeyError('The 1st argument must be a callable function accepting dataset(s) as input.')
 
     # Get residuals and estimate standard deviation
-    residuals,sigma = ([],[])
     for i in range(nSignals):
-        residuals.append(Vfit[i] - Vexp[i])
-        sigma.append(np.std(residuals[i]))
+        residuals = [Vfit[i] - Vexp[i] for i  in range(nSignals)]
+    if noiselvl is None:
+        noiselvl = [noiselevel(Vexp[i]) for i in range(nSignals)]
+    noiselvl = np.atleast_1d(noiselvl)
 
     # Prepare bootstrap sampler (reduced I/O-stream when parallelized) 
     def sample():
@@ -106,7 +113,7 @@ def sample():
             #Determine requested re-sampling method
             if resampling == 'gaussian':
                 # Resample from a Gaussian distribution with variance estimated from the residuals
-                Vsample[i] = Vfit[i] + np.random.normal(0, sigma[i], len(Vfit[i]))
+                Vsample[i] = Vfit[i] + np.random.normal(0, noiselvl[i], len(Vfit[i]))
             elif resampling == 'residual':
                 # Resample from the residual directly
                 Vsample[i] =  Vfit[i] + residuals[i][np.random.permutation(len(Vfit[i]))]

diff --git a/test/test_bootstrap_analysis.py b/test/test_bootstrap_analysis.py
@@ -4,55 +4,51 @@
 from deerlab.dd_models import dd_gauss
 from deerlab.utils import assert_docstring
 
+x = np.linspace(0,5,200)
+sigma = 0.01
+phaseshift = np.exp(1j*np.pi/2)
+model = lambda par: par[0] + par[1]*x
+model_global = lambda par: [model([par[0],par[1]]), model([par[2],par[3]])]
+model_complex = lambda par: model(par)*phaseshift
+yexp = model([1,3]) + whitegaussnoise(x,sigma)
+yglobal1 = model([1,3]) + whitegaussnoise(x,sigma)
+yglobal2 = model([2,4]) + whitegaussnoise(x,sigma)
+yexp_complex = model_complex([1,3]) + whitegaussnoise(x,sigma)
+
+def fitfcn(yexp):
+    fit = snlls(yexp,model,[1,3],uq=False)
+    return fit.nonlin*fit.lin
+
+def fitfcn_global(ys):
+    fit = snlls(ys,model_global,[1,3,2,4],uq=False)
+    return fit.nonlin*fit.lin
+
+def fitfcn_multiout(yexp):
+    fit = snlls(yexp,model,[1,3],uq=False)
+    return fit.nonlin*fit.lin, fit.model 
+
+def fitfcn_complex(yexp):
+    fit = snlls(yexp,model,[1,3],uq=False)
+    return fit.nonlin*fit.lin
+
+
 def test_basics():
 # ======================================================================
     "Check the basic functionality of the bootstrapping"
 
-    t = np.linspace(0,5,200)
-    r = np.linspace(2,6,300)
-    P = dd_gauss(r,4, 0.8)
-    K = dipolarkernel(t,r)
-    Vexp = K@P + whitegaussnoise(t,0.01,seed=1)
-
-    par0 = [3, 0.5]
-    Vmodel = lambda par: K@dd_gauss(r,*par)
-    fit = snlls(Vexp,Vmodel,par0)
-    Vfit = fit.model
-
+    parfit = fitfcn(yexp)
+    paruq = bootstrap_analysis(fitfcn,yexp,model(parfit),10)
 
-    def bootfcn(V):
-        fit = snlls(V,Vmodel,par0)
-        return fit.nonlin
-
-    paruq = bootstrap_analysis(bootfcn,Vexp,Vfit,10)
-
-    assert all(abs(paruq.mean - fit.nonlin) < 1.5e-2)
+    assert all(abs(paruq.mean - parfit) < 1.5e-2)
 # ======================================================================
 
-
 def test_resampling():
 # ======================================================================
     "Check that both bootstrap resampling method yield similar results"
 
-    t = np.linspace(0,5,200)
-    r = np.linspace(2,6,300)
-    P = dd_gauss(r,4, 0.8)
-    K = dipolarkernel(t,r)
-    Vexp = K@P + whitegaussnoise(t,0.01,seed=1)
-
-    par0 = [3, 0.5]
-    Vmodel = lambda par: K@dd_gauss(r,*par)
-    fit = snlls(Vexp,Vmodel,par0)
-    Vfit = fit.model
-
-
-    def bootfcn(V):
-        fit = snlls(V,Vmodel,par0)
-        return fit.nonlin
-
-    paruq1 = bootstrap_analysis(bootfcn,Vexp,Vfit,5,resampling='residual')
-    paruq2 = bootstrap_analysis(bootfcn,Vexp,Vfit,5,resampling='gaussian')
-
+    parfit = fitfcn(yexp)
+    paruq1 = bootstrap_analysis(fitfcn,yexp,model(parfit),10,resampling='residual')
+    paruq2 = bootstrap_analysis(fitfcn,yexp,model(parfit),10,resampling='gaussian')
 
     assert all(abs(paruq1.mean - paruq2.mean) < 1.6e-2)
 # ======================================================================
@@ -62,83 +58,30 @@ def test_multiple_ouputs():
 # ======================================================================
     "Check that both bootstrap handles the correct number outputs"
 
-    t = np.linspace(0,5,200)
-    r = np.linspace(2,6,300)
-    P = dd_gauss(r,4, 0.8)
-    K = dipolarkernel(t,r)
-    Vexp = K@P + whitegaussnoise(t,0.01,seed=1)
-
-    par0 = [3, 0.5]
-    Vmodel = lambda par: K@dd_gauss(r,*par)
-    fit = snlls(Vexp,Vmodel,par0)
-    Vfit = fit.model
-
-
-    def bootfcn(V):
-        fit = snlls(V,Vmodel,par0)
-        Pfit = dd_gauss(r,*fit.nonlin)
-        return fit.nonlin, Pfit
-
-    paruq1 = bootstrap_analysis(bootfcn,Vexp,Vfit,4)
+    parfit,yfit = fitfcn_multiout(yexp)
+    paruq = bootstrap_analysis(fitfcn_multiout,yexp,model(parfit),10)
 
-
-    assert len(paruq1)==2
+    assert len(paruq)==2 and all(abs(paruq[0].mean - parfit)) and all(abs(paruq[1].mean - yfit))
 # ======================================================================
 
 def test_multiple_datasets():
 # ======================================================================
     "Check bootstrapping when using multiple input datasets"
 
-    t1 = np.linspace(0,5,200)
-    t2 = np.linspace(-0.5,3,300)
-    r = np.linspace(2,6,300)
-    P = dd_gauss(r,4, 0.8)
-    K1 = dipolarkernel(t1,r)
-    K2 = dipolarkernel(t2,r)
-
-    Vexp1 = K1@P + whitegaussnoise(t1,0.01,seed=1)
-    Vexp2 = K2@P + whitegaussnoise(t2,0.02,seed=2)
-
-    def Vmodel(par):
-        V1 = K1@dd_gauss(r,*par)
-        V2 = K2@dd_gauss(r,*par)
-        return [V1,V2]
-
-    par0 = [3, 0.5]
-    fit = snlls([Vexp1,Vexp2],Vmodel,par0)
-    Vfit1,Vfit2 = fit.model
+    parfit = fitfcn_global([yglobal1,yglobal2])
+    paruq = bootstrap_analysis(fitfcn_global,[yglobal1,yglobal2],[model([1,3]),model([2,4])],5)
 
-    def bootfcn(V):
-        fit = snlls(V,Vmodel,par0)
-        return fit.nonlin
-
-    paruq = bootstrap_analysis(bootfcn,[Vexp1,Vexp2],[Vfit1,Vfit2],5)
-
-    assert all(abs(paruq.mean - fit.nonlin) < 1.5e-2)
+    assert all(abs(paruq.mean - parfit) < 1.5e-2)
 # ======================================================================
 
 def test_parallelization():
 # ======================================================================
     "Check that bootstrap_analysis can run with multiple cores"
 
-    t = np.linspace(0,5,200)
-    r = np.linspace(2,6,300)
-    P = dd_gauss(r,4, 0.8)
-    K = dipolarkernel(t,r)
-    Vexp = K@P + whitegaussnoise(t,0.01)
-
-    par0 = [3, 0.5]
-    Vmodel = lambda par: K@dd_gauss(r,*par)
-    fit = snlls(Vexp,Vmodel,par0)
-    Vfit = fit.model
-
-    def bootfcn(V):
-        fit = snlls(V,Vmodel,par0)
-        return fit.nonlin
+    parfit = fitfcn(yexp)
+    paruq = bootstrap_analysis(fitfcn,yexp,model(parfit),10,cores=-1)
 
-    paruq = bootstrap_analysis(bootfcn,Vexp,Vfit,10,cores=-1)
-
-    assert all(abs(paruq.mean - fit.nonlin) < 1.5e-2)
+    assert all(abs(paruq.mean - parfit) < 1.5e-2)
 # ======================================================================
 
 # ======================================================================
@@ -151,24 +94,10 @@ def test_complex_values():
 # ======================================================================
     "Check the functionality of the bootstrapping with complex-valued outputs"
 
-    t = np.linspace(0,5,200)
-    r = np.linspace(2,6,300)
-    P = dd_gauss(r,4, 0.8)
-    K = dipolarkernel(t,r)
-    Vexp = K@P + whitegaussnoise(t,0.01,seed=1) 
-    Vexp = Vexp + 1j*whitegaussnoise(t,0.01,seed=1) 
-    par0 = [3, 0.5]
-    Vmodel = lambda par: K@dd_gauss(r,*par) + 1j*np.zeros_like(t) 
-    fit = snlls(Vexp,Vmodel,par0)
-    Vfit = fit.model
-
-    def bootfcn(V):
-        fit = snlls(V,Vmodel,par0)
-        return fit.nonlin
+    parfit = fitfcn_complex(yexp_complex)
+    paruq = bootstrap_analysis(fitfcn_complex,yexp_complex,model_complex(parfit),3)
 
-    paruq = bootstrap_analysis(bootfcn,Vexp,Vfit,3)
-
-    assert all(abs(paruq.mean - fit.nonlin) < 1.5e-2)
+    assert all(abs(paruq.mean - parfit) < 1.5e-2)
 # ======================================================================
 
 import pytest
@@ -183,3 +112,13 @@ def bootfcn(y):
     with pytest.raises(MemoryError):
         paruq = bootstrap_analysis(bootfcn,yexp,yfit,1e6)
 # ======================================================================
+
+def test_noiselvl():
+# ======================================================================
+    "Check that noise levels can be specified"
+
+    parfit = fitfcn(yexp)
+    paruq = bootstrap_analysis(fitfcn,yexp,model(parfit),10, noiselvl=sigma)
+
+    assert all(abs(paruq.mean - parfit) < 1.5e-2)
+# ======================================================================