From dc17ebc55a29c17a5740ca3c8529b2a3f1c14b27 Mon Sep 17 00:00:00 2001 From: Qianqian Fang Date: Tue, 10 Nov 2015 14:11:39 -0500 Subject: [PATCH] use Kahan sum for total weight accummulation, otherwise counter stops at 16777216 --- mmc/trunk/mmclab/example/demo_compare_mmc_mcx.m | 4 ++-- mmc/trunk/src/mmclab.cpp | 10 +++++----- mmc/trunk/src/tetray.c | 8 ++++---- mmc/trunk/src/tettracing.c | 9 ++++++++- mmc/trunk/src/tettracing.h | 3 ++- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/mmc/trunk/mmclab/example/demo_compare_mmc_mcx.m b/mmc/trunk/mmclab/example/demo_compare_mmc_mcx.m index 8dc6442e..8540884a 100644 --- a/mmc/trunk/mmclab/example/demo_compare_mmc_mcx.m +++ b/mmc/trunk/mmclab/example/demo_compare_mmc_mcx.m @@ -50,7 +50,7 @@ cfg.tend=5e-9; cfg.tstep=1e-10; cfg.debuglevel='TP'; -cfg.isreflect=0; +cfg.isreflect=1; cfg.detpos=[0 30 30 2]; % define wide-field planar source @@ -88,7 +88,7 @@ %%----------------------------------------------------------------- cfgx=cfg; -cfgx=rmfield(cfgx,{'node','elem','elemprop','debuglevel'}); +cfgx=rmfield(cfgx,{'node','elem','elemprop','debuglevel','facenb','evol','e0','isreoriented'}); dim=60; cfgx.vol=ones(dim,dim,dim); cfgx.vol=uint8(cfgx.vol); diff --git a/mmc/trunk/src/mmclab.cpp b/mmc/trunk/src/mmclab.cpp index 786d06a7..b38cbf8b 100644 --- a/mmc/trunk/src/mmclab.cpp +++ b/mmc/trunk/src/mmclab.cpp @@ -48,7 +48,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){ mcconfig cfg; tetmesh mesh; raytracer tracer={NULL,0,NULL,NULL,NULL}; - visitor master={0.f,0.f,0.f,0,0,0,NULL,NULL}; + visitor master={0.f,0.f,0.f,0,0,0,NULL,NULL,0.f,0.f}; RandType ran0[RAND_BUF_LEN] __attribute__ ((aligned(16))); RandType ran1[RAND_BUF_LEN] __attribute__ ((aligned(16))); unsigned int i; @@ -126,7 +126,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){ #pragma omp parallel private(ran0,ran1,threadid) { - visitor visit={0.f,0.f,1.f/cfg.tstep,DET_PHOTON_BUF,0,0,NULL,NULL}; + visitor visit={0.f,0.f,1.f/cfg.tstep,DET_PHOTON_BUF,0,0,NULL,NULL,0.f,0.f}; visit.reclen=(1+((cfg.ismomentum)>0))*mesh.prop+(cfg.issaveexit>0)*6+3; if(cfg.issavedet){ if(cfg.issaveseed) @@ -176,7 +176,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){ } #pragma omp atomic - master.accumu_weight += visit.accumu_weight; + master.totalweight += visit.totalweight; if(cfg.issavedet){ #pragma omp atomic @@ -231,9 +231,9 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){ MMCDEBUG(&cfg,dlTime,(cfg.flog,"speed ...\t%.0f ray-tetrahedron tests (%.0f were overhead)\n",raytri,raytri0)); tracer_clear(&tracer); - if(cfg.isnormalized && master.accumu_weight){ + if(cfg.isnormalized && master.totalweight){ printf("total simulated energy: %.0f\tabsorbed: %5.5f%%\tnormalizor=%g\n", - master.accumu_weight,100.f*Eabsorb/master.accumu_weight,mesh_normalize(&mesh,&cfg,Eabsorb,master.accumu_weight)); + master.totalweight,100.f*Eabsorb/master.totalweight,mesh_normalize(&mesh,&cfg,Eabsorb,master.totalweight)); } MMCDEBUG(&cfg,dlTime,(cfg.flog,"\tdone\t%d\n",GetTimeMillis()-t0)); diff --git a/mmc/trunk/src/tetray.c b/mmc/trunk/src/tetray.c index dc227c29..cc7a9fc8 100644 --- a/mmc/trunk/src/tetray.c +++ b/mmc/trunk/src/tetray.c @@ -44,7 +44,7 @@ int main(int argc, char**argv){ mcconfig cfg; tetmesh mesh; raytracer tracer; - visitor master={0.f,0.f,0.f,0,0,0,NULL,NULL,0.f}; + visitor master={0.f,0.f,0.f,0,0,0,NULL,NULL,0.f,0.f}; double Eabsorb=0.0; RandType ran0[RAND_BUF_LEN] __attribute__ ((aligned(16))); RandType ran1[RAND_BUF_LEN] __attribute__ ((aligned(16))); @@ -86,7 +86,7 @@ int main(int argc, char**argv){ #pragma omp parallel private(ran0,ran1,threadid) { - visitor visit={0.f,0.f,1.f/cfg.tstep,DET_PHOTON_BUF,0,0,NULL,NULL,0.f}; + visitor visit={0.f,0.f,1.f/cfg.tstep,DET_PHOTON_BUF,0,0,NULL,NULL,0.f,0.f}; visit.reclen=(1+((cfg.ismomentum)>0))*mesh.prop+(cfg.issaveexit>0)*6+3; if(cfg.issavedet){ if(cfg.issaveseed) @@ -120,7 +120,7 @@ int main(int argc, char**argv){ } #pragma omp atomic - master.accumu_weight += visit.accumu_weight; + master.totalweight += visit.totalweight; if(cfg.issavedet){ #pragma omp atomic @@ -162,7 +162,7 @@ int main(int argc, char**argv){ if(cfg.isnormalized){ fprintf(cfg.flog,"total simulated energy: %f\tabsorbed: %5.5f%%\tnormalizor=%g\n", - master.accumu_weight,100.f*Eabsorb/master.accumu_weight,mesh_normalize(&mesh,&cfg,Eabsorb,master.accumu_weight)); + master.totalweight,100.f*Eabsorb/master.totalweight,mesh_normalize(&mesh,&cfg,Eabsorb,master.totalweight)); } if(cfg.issave2pt){ switch(cfg.outputtype){ diff --git a/mmc/trunk/src/tettracing.c b/mmc/trunk/src/tettracing.c index bc4f4ea7..277d798a 100644 --- a/mmc/trunk/src/tettracing.c +++ b/mmc/trunk/src/tettracing.c @@ -724,6 +724,7 @@ float onephoton(unsigned int id,raytracer *tracer,tetmesh *mesh,mcconfig *cfg, int oldeid,fixcount=0,exitdet=0; int *enb; float mom; + float kahany, kahant; ray r={cfg->srcpos,{cfg->srcdir.x,cfg->srcdir.y,cfg->srcdir.z},{MMC_UNDEFINED,0.f,0.f},cfg->bary0,cfg->dim.x,cfg->dim.y-1,0,0,1.f,0.f,0.f,0.f,0.,0,NULL,NULL,cfg->srcdir.w}; float (*engines[4])(ray *r, raytracer *tracer, mcconfig *cfg, visitor *visit)= @@ -746,7 +747,13 @@ float onephoton(unsigned int id,raytracer *tracer,tetmesh *mesh,mcconfig *cfg, /*initialize the photon parameters*/ launchphoton(cfg, &r, mesh, ran, ran0); r.partialpath[visit->reclen-2] = r.weight; - visit->accumu_weight += r.weight; + + /*use Kahan summation to accumulate weight, otherwise, counter stops at 16777216*/ + /*http://stackoverflow.com/questions/2148149/how-to-sum-a-large-number-of-float-number*/ + kahany=r.weight-visit->kahanc; + kahant=visit->totalweight + kahany; + visit->kahanc=(kahant - visit->totalweight) - kahany; + visit->totalweight=kahant; #ifdef MMC_USE_SSE const float int_coef_arr[4] = { -1.f, -1.f, -1.f, 1.f }; diff --git a/mmc/trunk/src/tettracing.h b/mmc/trunk/src/tettracing.h index 5fb5400e..dc72694e 100644 --- a/mmc/trunk/src/tettracing.h +++ b/mmc/trunk/src/tettracing.h @@ -71,7 +71,8 @@ typedef struct MMC_visitor{ int reclen; float *partialpath; void *photonseed; - float accumu_weight; + float totalweight; + float kahanc; } visitor; #ifdef __cplusplus