EGSnrc C++ class library  Report PIRS-898 (2021)
Iwan Kawrakow, Ernesto Mainegra-Hing, Frederic Tessier, Reid Townson and Blake Walters
egs_run_control.cpp
Go to the documentation of this file.
1 /*
2 ###############################################################################
3 #
4 # EGSnrc egs++ run control
5 # Copyright (C) 2015 National Research Council Canada
6 #
7 # This file is part of EGSnrc.
8 #
9 # EGSnrc is free software: you can redistribute it and/or modify it under
10 # the terms of the GNU Affero General Public License as published by the
11 # Free Software Foundation, either version 3 of the License, or (at your
12 # option) any later version.
13 #
14 # EGSnrc is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
17 # more details.
18 #
19 # You should have received a copy of the GNU Affero General Public License
20 # along with EGSnrc. If not, see <http://www.gnu.org/licenses/>.
21 #
22 ###############################################################################
23 #
24 # Author: Iwan Kawrakow, 2005
25 #
26 # Contributors: Frederic Tessier
27 # Hubert Ho
28 # Ernesto Mainegra-Hing
29 #
30 ###############################################################################
31 */
32 
33 
39 #include "egs_run_control.h"
40 #include "egs_application.h"
41 #include "egs_input.h"
42 #include "egs_functions.h"
43 #include "egs_library.h"
44 
45 #include <vector>
46 #include <ctime>
47 #include <cstdio>
48 
49 using namespace std;
50 
51 vector<EGS_Library *> rc_libs;
52 static int n_run_controls = 0;
53 
55  geomErrorMax(0), app(a), input(0), ncase(0), ndone(0), maxt(-1), accu(-1),
56  nbatch(10), restart(0), nchunk(1), cpu_time(0), previous_cpu_time(0),
57  rco_type(simple) {
58  n_run_controls++;
59  if (!app) egsFatal("EGS_RunControl::EGS_RunControl: it is not allowed\n"
60  " to construct a run control object on a NULL application\n");
61  input = app->getInput();
62  if (!input) {
63  egsWarning("EGS_RunControl::EGS_RunControl: the application has no"
64  " input\n");
65  return;
66  }
67  input = input->takeInputItem("run control");
68  if (!input) {
69  egsWarning("EGS_RunControl::EGS_RunControl: no 'run control' "
70  "input\n");
71  return;
72  }
73  double ncase_double;
74  int err = input->getInput("number of histories", ncase_double);
75  if (err) {
76  err = input->getInput("ncase", ncase_double);
77  if (err)
78  egsWarning("EGS_RunControl: missing/wrong 'ncase' or "
79  "'number of histories' input\n");
80  }
81  ncase = EGS_I64(ncase_double);
82  /*****************************************************
83  * Split histories into different parallel jobs.
84  * For the JCFO ncase reset to total as it is handled
85  * via the lock file mechanism dispatching smaller
86  * of histories chunks.
87  *****************************************************/
88  if (app->getNparallel()) {
89  ncase /= app->getNparallel();
90  }
91  err = input->getInput("nbatch",nbatch);
92  if (err) {
93  nbatch = 10;
94  }
95  err = input->getInput("max cpu hours allowed",maxt);
96  if (err) {
97  maxt = -1;
98  }
99  err = input->getInput("statistical accuracy sought",accu);
100  if (err) {
101  accu = -1;
102  }
103  err = input->getInput("geometry error limit", geomErrorMax);
104  if (err) {
105  geomErrorMax = 0;
106  }
107 
108  vector<string> ctype;
109  ctype.push_back("first");
110  ctype.push_back("restart");
111  ctype.push_back("analyze");
112  ctype.push_back("combine");
113  restart = input->getInput("calculation",ctype,0);
114 }
115 
117  if (input) {
118  delete input;
119  }
120  n_run_controls--;
121  if (!n_run_controls) {
122  while (rc_libs.size() > 0) {
123  delete rc_libs[rc_libs.size()-1];
124  rc_libs.pop_back();
125  }
126  }
127 }
128 
129 void EGS_RunControl::describeRCO() {
131  "Run Control Object (RCO):\n"
132  "=========================\n");
133  switch (rco_type) {
134  case simple:
135  egsInformation(" type = simple\n");
136  break;
137  case balanced:
138  egsInformation(" type = balanced (JCF)\n");
139  break;
140  case uniform:
141  egsInformation(" type = uniform\n");
142  break;
143  }
144 }
145 
146 bool EGS_RunControl::storeState(ostream &data) {
147  if (!egsStoreI64(data,ndone)) {
148  return false;
149  }
150  data << " " << (cpu_time+previous_cpu_time) << endl;
151  return data.good();
152 }
153 
154 bool EGS_RunControl::setState(istream &data) {
155  EGS_I64 ndone1;
156  if (!egsGetI64(data,ndone1)) {
157  return false;
158  }
159  ndone += ndone1;
160  ncase += ndone1;
161  data >> previous_cpu_time;
162  return data.good();
163 }
164 
165 bool EGS_RunControl::addState(istream &data) {
166  EGS_Float previous_cpu_time_save = previous_cpu_time;
167  if (!setState(data)) {
168  return false;
169  }
170  previous_cpu_time += previous_cpu_time_save;
171  return true;
172 }
173 
174 void EGS_RunControl::resetCounter() {
175  previous_cpu_time = 0;
176  cpu_time = 0;
177  timer.start();
178  ncase = 0;
179  ndone = 0;
180 }
181 
183  if (restart == 1 || restart == 2) {
184  if (app->readData()) {
185  return -1;
186  }
187  if (restart == 2) {
188  ncase = ndone;
189  egsInformation("\n\nResult analysis only\n\n");
190  return 1;
191  }
192  }
193  else if (restart == 3) {
194  app->describeSimulation();
195  egsInformation("\n\nCombine results only\n\n");
196  egsInformation("calling combineResults()\n");
197  int err = app->combineResults();
198  ncase = ndone;
199  return err ? -1 : 2;
200  }
201  app->describeSimulation();
202  time_t tinfo = time(0);
203  egsInformation("\n\nStarting simulation on %s\n",
204  asctime(localtime(&tinfo)));
205  if (restart == 0) {
206  egsInformation(" Fresh simulation of %lld histories\n\n\n",ncase);
207  }
208  else {
209  egsInformation(" Restarted simulation with %lld old and %lld"
210  " new histories\n\n\n",ndone,ncase-ndone);
211  }
212  timer.start();
213  return 0;
214 }
215 
216 bool EGS_RunControl::startBatch(int ibatch, EGS_I64 ncase_per_batch) {
217  if (!ibatch) egsInformation(
218  " Batch CPU time Result Uncertainty(%c)\n"
219  "==========================================================\n",'%');
220  if (maxt > 0 && ndone > 0) {
221  EGS_Float time_per_shower = (cpu_time + previous_cpu_time)/ndone;
222  EGS_Float extra_time = time_per_shower*ncase_per_batch;
223  if (cpu_time + extra_time > maxt*3600) {
224  egsWarning("\n\n*** Not enough time to finish another batch\n"
225  " => terminating simulation.\n\n");
226  return false;
227  }
228  }
229  egsInformation("%7d",ibatch+1);
230  ndone += ncase_per_batch;
231  return true;
232 }
233 
235  cpu_time = timer.time();
236  int out = app->outputData();
237  if (out) {
238  egsWarning("\n\noutputData() returned error code %d ?\n",out);
239  }
240  double sum, sum2, norm, count;
241  app->getCurrentResult(sum,sum2,norm,count);
242  double f, df;
243  if (sum > 0 && sum2 > 0 && norm > 0 && count > 1) {
244  f = sum*norm/count;
245  df = count*sum2/(sum*sum)-1;
246  if (df > 0) {
247  df = 100*sqrt(df/(count-1));
248  }
249  else {
250  df = 100;
251  }
252  }
253  else {
254  f = 0;
255  df = 100;
256  }
257  egsInformation(" %12.2f %14g %14.2f\n",cpu_time,f,df);
258  if (df < 100 && accu > 0 && df < accu) {
259  char c = '%';
260  egsWarning("\n\n*** Reached the requested uncertainty of %g%c\n"
261  " => terminating simulation.\n\n",accu,c);
262  return false;
263  }
264  return true;
265 }
266 
267 EGS_UniformRunControl::EGS_UniformRunControl(EGS_Application *a) :
268  EGS_RunControl(a), njob(0), npar(app->getNparallel()),
269  ipar(app->getIparallel()), ifirst(app->getFirstParallel()),
270  milliseconds(1000), check_intervals(5), check_egsdat(true),
271  watcher_job(false) {
272 
273  rco_type = uniform;
274 
275  if (input) {
276 
277  /*Change waiting time to check for parallel run completion*/
278  int dummy;
279  int err = input->getInput("interval wait time", dummy);
280  if (!err) {
281  milliseconds = dummy;
282  }
283 
284  /*Change how many times to check for parallel run completion*/
285  err = input->getInput("number of intervals", dummy);
286  if (!err) {
287  check_intervals = dummy;
288  }
289 
290  /* Define watcher jobs to check for parallel run completion*/
291  vector<int> w_jobs;
292  err = input->getInput("watcher jobs", w_jobs);
293  if (!err) {
294  for (int i = 0; i < w_jobs.size(); i++) {
295  if (ipar == w_jobs[i]) {
296  watcher_job = true;
297  break;
298  }
299  }
300  }
301  else { // use defaults
302  /* last job is watcher job */
303  if (ipar == ifirst + npar - 1) {
304  watcher_job = true;
305  }
306  else {
307  watcher_job = false;
308  }
309  }
310 
311  /* Request checking parallel run completion */
312  vector<string> check_options;
313  check_options.push_back("yes");
314  check_options.push_back("no");
315  int ichk = input->getInput("check jobs completed",check_options,0);
316  if (ichk != 0) {
317  check_egsdat = false; // true by default
318  }
319 
320  }
321  else { // use defaults if no RCO input found
322  /* last job is watcher job */
323  if (ipar == ifirst + npar - 1) {
324  watcher_job = true;
325  }
326  }
327 }
328 
329 int EGS_UniformRunControl::startSimulation() {
330 
331 
332  /* Check run completion based on *egsdat files requires erasing
333  existing files from previous runs.
334  */
335  if (check_egsdat) {
336  char buf[512];
337  sprintf(buf,"%s_w%d.egsdat",app->getFinalOutputFile().c_str(), ipar);
338  string datFile = egsJoinPath(app->getAppDir(),buf);
339  if (remove(datFile.c_str()) == 0) {
340  egsWarning("EGS_UniformRunControl: %s deleted\n",
341  datFile.c_str());
342  }
343  }
344 
346 }
347 
348 void EGS_UniformRunControl::describeRCO() {
349 
350  EGS_RunControl::describeRCO();
351 
352  if (watcher_job) {
353  if (check_egsdat) {
355  " Watcher job: remains running after completion checking\n"
356  " for other jobs finishing every %d s for %d s!\n",
357  milliseconds/1000, check_intervals*milliseconds/1000);
358  }
359  else {
361  " Option to check for finishing jobs is OFF!\n\n");
362  }
363  }
364 
365 }
366 
367 #ifdef WIN32
368 
369  #include <io.h>
370  #include <stdio.h>
371  #include <fcntl.h>
372  #include <sys/types.h>
373  #include <sys/stat.h>
374  #include <sys/locking.h>
375  #include <windows.h>
376 
377  #define OPEN_FILE _open
378  #define CLOSE_FILE _close
379  #define CREATE_FLAGS _O_CREAT | _O_EXCL | _O_RDWR, _S_IREAD | _S_IWRITE
380  #define OPEN_FLAGS _O_RDWR,_S_IREAD | _S_IWRITE
381  #define WAIT_FOR_FILE _sleep(1000)
382  #define WRITE_FILE _write
383  #define READ_FILE _read
384 
385 #else
386 
387  #include <unistd.h>
388  #include <fcntl.h>
389  #include <sys/types.h>
390  #include <sys/stat.h>
391  #include <errno.h>
392  #include <string.h>
393  #include <stdio.h>
394 
395  #define OPEN_FILE open
396  #define CLOSE_FILE close
397  #define CREATE_FLAGS O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR
398  #define OPEN_FLAGS O_RDWR
399  #define WAIT_FOR_FILE sleep(1)
400  #define WRITE_FILE write
401  #define READ_FILE read
402 
403 #endif
404 
405 #ifndef SKIP_DOXYGEN
406 
411 class EGS_LOCAL EGS_FileLocking {
412 public:
413  int fd;
414  bool is_locked;
415  int ntry;
416 #ifndef WIN32
417  struct flock fl_write, fl_unlock;
418 #endif
419  EGS_FileLocking() : fd(-1), is_locked(false), ntry(15) {
420 #ifndef WIN32
421  fl_write.l_type = F_WRLCK;
422  fl_write.l_whence = SEEK_SET;
423  fl_write.l_start = 0;
424  fl_write.l_len = 0;
425  fl_unlock.l_type = F_UNLCK;
426  fl_unlock.l_whence = SEEK_SET;
427  fl_unlock.l_start = 0;
428  fl_unlock.l_len = 0;
429 #endif
430  };
431  ~EGS_FileLocking() {
432  if (fd > 0) {
433  CLOSE_FILE(fd);
434  }
435  };
436  bool createControlFile(const char *fname) {
437  is_locked = false;
438  if (fd > 0) {
439  CLOSE_FILE(fd);
440  }
441  fd = OPEN_FILE(fname,CREATE_FLAGS);
442  egsWarning("createControlFile: file=%s fd=%d\n",fname,fd);
443  if (fd < 0) {
444  egsWarning("createControlFile(): open failed! (fd=%d)\n",fd);
445 #ifndef WIN32
446  perror("System error was");
447 #endif
448  }
449  return lockControlFile();
450  };
451  bool openControlFile(const char *fname) {
452  is_locked = false;
453  if (fd > 0) {
454  CLOSE_FILE(fd);
455  }
456  for (int t=0; t<ntry; t++) {
457  fd = OPEN_FILE(fname,OPEN_FLAGS);
458  if (fd > 0) {
459  break;
460  }
461  WAIT_FOR_FILE;
462  }
463  return (fd > 0);
464  };
465  bool closeControlFile() {
466  if (fd > 0) {
467  int res = CLOSE_FILE(fd);
468  fd = -1;
469  return !res;
470  }
471  return true;
472  };
473  bool lockControlFile() {
474  if (is_locked) {
475  return true;
476  }
477  if (fd < 0) {
478  return false;
479  }
480 #ifdef WIN32
481  long np = _lseek(fd,0L,SEEK_SET);
482  if (np) {
483  egsWarning("lockControlFile: _lseek returned %d?\n",np);
484  return false;
485  }
486  int res = _locking(fd,_LK_LOCK,1000000L);
487  if (!res) {
488  is_locked = true;
489  return true;
490  }
491  return false;
492 #else
493  for (int i1=0; i1<5; i1++) {
494  for (int i2=0; i2<12; i2++) {
495  int res = fcntl(fd,F_SETLK,&fl_write);
496  if (!res) {
497  is_locked = true;
498  return true;
499  }
500  WAIT_FOR_FILE ;
501  }
502  egsWarning("lockControlFile: failed to lock file for "
503  "12 seconds...\n");
504  }
505  return false;
506 #endif
507  };
508  bool unlockControlFile() {
509  if (!is_locked) {
510  return true;
511  }
512  if (fd < 0) {
513  return false;
514  }
515 #ifdef WIN32
516  int np = _lseek(fd,0L,SEEK_SET);
517  if (np) {
518  egsWarning("unlockControlFile: _lseek returned %d?\n",np);
519  return false;
520  }
521  int res = _locking(fd,_LK_UNLCK,1000000L);
522 #else
523  int res = fcntl(fd,F_SETLKW,&fl_unlock);
524 #endif
525  if (!res) {
526  is_locked = false;
527  return true;
528  }
529  return false;
530  };
531  bool rewindControlFile() {
532  if (fd < 0) {
533  return false;
534  }
535  if (!is_locked) {
536  if (!lockControlFile()) {
537  return false;
538  }
539  }
540 #ifdef WIN32
541  return !_lseek(fd,0,SEEK_SET);
542 #else
543  return !lseek(fd,0,SEEK_SET);
544 #endif
545  };
546 };
547 
548 #endif
549 
550 EGS_JCFControl::EGS_JCFControl(EGS_Application *a, int Nbuf) :
551  EGS_RunControl(a), tsum(0), tsum2(0), tcount(0), norm(1), last_sum(0),
552  last_sum2(0), last_count(0), njob(0), npar(app->getNparallel()),
553  ipar(app->getIparallel()), ifirst(app->getFirstParallel()),
554  first_time(true), removed_jcf(false), nbuf(Nbuf), p(new EGS_FileLocking) {
555 
556  rco_type = balanced;
557 
558  /* Recover initial number of histories */
559  if (npar) {
560  ncase *= npar;
561  }
562 
563  if (input) {
564  int err = input->getInput("nchunk",nchunk);
565  if (err) {
566  nchunk = 10;
567  }
568  }
569  else {
570  nchunk = 10;
571  }
572  if (nbuf < 0) {
573  nbuf = 1024;
574  }
575  buf = new char [nbuf];
576  nleft = ncase;
577  ntot = 0;
578  //egsInformation("EGS_JCFControl::EGS_JCFControl:\n");
579  //egsInformation(" ncase = %lld nleft = %lld nchunk = %d\n",
580  // nleft,ncase,nchunk);
581 }
582 
583 bool EGS_JCFControl::createControlFile() {
584  string cfile = egsJoinPath(app->getAppDir(),app->getFinalOutputFile());
585  cfile += ".lock";
586  if (!p->createControlFile(cfile.c_str())) {
587  egsWarning("EGS_JCFControl: failed to create or lock the "
588  " job control file %s\n\n",cfile.c_str());
589  return false;
590  }
591  if (p->fd < 0) {
592  return false;
593  }
594  writeControlString();
595  int nwant = strlen(buf)+1;
596  int nwrite = WRITE_FILE(p->fd,buf,nwant);
597  if (nwrite != nwant) {
598  return false;
599  }
600  return p->unlockControlFile();
601 }
602 
603 bool EGS_JCFControl::openControlFile() {
604  string cfile = egsJoinPath(app->getAppDir(),app->getFinalOutputFile());
605  cfile += ".lock";
606  if (!p->openControlFile(cfile.c_str())) {
607  egsWarning("EGS_JCFControl: failed to open the "
608  " job control file %s\n\n",cfile.c_str());
609  return false;
610  }
611  return true;
612 }
613 
614 #ifdef NO_SSTREAM
615  #include <strstream>
616  #define MY_OSTREAM std::ostrstream
617  #define MY_ISTREAM std::istrstream
618 #else
619  #include <sstream>
620  #define MY_OSTREAM std::ostringstream
621  #define MY_ISTREAM std::istringstream
622 #endif
623 
624 bool EGS_JCFControl::writeControlString() {
625  //if( first_time ) { start_time = time(0); first_time = false; }
626  if (first_time) {
627  start_time = time(0);
628  }
629  /*
630  MY_OSTREAM data(buf);
631  //ostream &data = cout;
632  if( !egsStoreI64(data,ntot) ) return false;
633  if( !egsStoreI64(data,nleft) ) return false;
634  data << " " << njob << " " << tsum << " " << tsum2 << " " << tcount << " ";
635  double f = tsum*norm, df;
636  if( tsum > 0 && tsum2 > 0 && norm > 0 && tcount > 1 ) {
637  df = tcount*tsum2/(tsum*tsum)-1;
638  if( df > 0 ) df = 100*sqrt(df/(tcount-1)); else df = 100;
639  } else df = 100;
640  data << f << " " << df << " " << start_time << endl;
641  if( f > 0 && df < 100 ) egsInformation("\nCombined result from all "
642  "parallel jobs: %g +/- %g%%\n\n",f,df);
643  egsInformation("EGS_JCFControl::writeControlString: <%s>\n",buf);
644  return data.good();
645  */
646  double f = tsum*norm, df;
647  if (tsum > 0 && tsum2 > 0 && norm > 0 && tcount > 1) {
648  f = tsum*norm/tcount;
649  df = tcount*tsum2/(tsum*tsum)-1;
650  if (df > 0) {
651  df = 100*sqrt(df/(tcount-1));
652  }
653  else {
654  df = 100;
655  }
656  }
657  else {
658  df = 100;
659  }
660  sprintf(buf,"%lld %lld %d %lg %lg %lg %lg %lg %ld ",ntot,nleft,njob,tsum,
661  tsum2,tcount,f,df,start_time);
662  return true;
663 }
664 
665 bool EGS_JCFControl::getCombinedResult(double &f, double &df) const {
666  if (tsum > 0 && tsum2 > 0 && norm > 0 && tcount > 1) {
667  f = tsum*norm/tcount;
668  df = tcount*tsum2/(tsum*tsum)-1;
669  if (df > 0) {
670  df = 100*sqrt(df/(tcount-1));
671  }
672  else {
673  df = 100;
674  }
675  return true;
676  }
677  df = 100;
678  f = 0;
679  return false;
680 }
681 
682 bool EGS_JCFControl::readControlString() {
683  /*
684  MY_ISTREAM data(buf);
685  if( !egsGetI64(data,ntot) ) return false;
686  if( !egsGetI64(data,nleft) ) return false;
687  double f,df;
688  data >> njob >> tsum >> tsum2 >> tcount >> f >> df >> start_time;
689  return data.good();
690  */
691  double f,df;
692  int res = sscanf(buf,"%lld %lld %d %lg %lg %lg %lg %lg %ld",
693  &ntot,&nleft,&njob,&tsum,&tsum2,&tcount,&f,&df,&start_time);
694  if (res == EOF || res != 9) {
695  return false;
696  }
697  return true;
698 }
699 
700 int EGS_JCFControl::startSimulation() {
702  if (res) {
703  return res;
704  }
705  bool ok = (ipar == ifirst) ? createControlFile() : openControlFile();
706  if (ok) {
707  egsInformation(" Parallel run with %d jobs and %d chunks per "
708  "job\n\n\n",npar,nchunk);
709  return 0;
710  }
711  return -99;
712 }
713 
714 bool EGS_JCFControl::readControlFile() {
715  if (!p->rewindControlFile()) {
716  egsWarning("EGS_JCFControl: failed to rewind the job control file\n");
717  return false;
718  }
719  int res = READ_FILE(p->fd,buf,nbuf-1);
720  if (res <= 0) {
721  p->unlockControlFile();
722  egsWarning("EGS_JCFControl: failed to read the job control file\n");
723  return false;
724  }
725  buf[res] = 0;
726  if (!readControlString()) {
727  p->unlockControlFile();
728  egsWarning("EGS_JCFControl: failed to read from the control string"
729  " <%s>\n",buf);
730  return false;
731  }
732  return true;
733 }
734 
735 bool EGS_JCFControl::writeControlFile() {
736  if (!writeControlString()) {
737  egsWarning("EGS_JCFControl::writeControlFile: failed to write to the "
738  "control string\n");
739  return false;
740  }
741  if (!p->rewindControlFile()) {
742  egsWarning("EGS_JCFControl: failed to rewind the job control file\n");
743  return false;
744  }
745  int nwant = strlen(buf)+1;
746  int nwrite = WRITE_FILE(p->fd,buf,nwant);
747  if (!p->unlockControlFile()) {
748  egsWarning("EGS_JCFControl::writeControlFile: failed to unlock the "
749  "control file\n");
750  return false;
751  }
752  if (nwrite != nwant) {
753  egsWarning("EGS_JCFControl::getNextChunk: could write only %d "
754  "instead of %d chars to the job control file?\n",nwrite,nwant);
755  return false;
756  }
757  return true;
758 }
759 
760 EGS_I64 EGS_JCFControl::getNextChunk() {
761  if (!readControlFile()) {
762  return -1;
763  }
764  if (first_time) {
765  first_time = false;
766  njob++;
767  }
768  double sum, sum2, count;
769  app->getCurrentResult(sum,sum2,norm,count);
770  tsum += sum - last_sum;
771  tsum2 += sum2 - last_sum2;
772  tcount += count - last_count;
773  last_sum = sum;
774  last_sum2 = sum2;
775  last_count = count;
776  EGS_I64 nrun = ncase/(npar*nchunk);
777  if (nrun < 1) {
778  nrun = 1;
779  }
780  if (nrun > nleft) {
781  nrun = nleft;
782  }
783  if (nrun > 0) {
784  app->setSimulationChunk(ntot,nrun);
785  }
786  nleft -= nrun;
787  ntot += nrun;
788  writeControlFile();
789  double f,df;
790  if (accu > 0 && getCombinedResult(f,df)) {
791  if (df < 100 && df < accu) {
792  char c = '%';
793  egsWarning("\n\n*** After combining the results of all parallel "
794  "jobs the requested\n uncertainty of %g%c was reached: %g%c\n"
795  " => terminating simulation.\n\n",accu,c,df,c);
796  return 0;
797  }
798  }
799  return nrun;
800 }
801 
815 void rco_sleep(const int &mscnds) {
816 #ifdef WIN32
817  Sleep(mscnds);
818 #else
819  usleep(mscnds * 1000);
820 #endif
821 }
822 
824  cpu_time = timer.time();
825  egsInformation("\n\nFinished simulation\n\n");
826  egsInformation("%-40s%.2f (sec.) %.4f(hours)\n",
827  "Total cpu time for this run:",cpu_time,cpu_time/3600);
828  //egsInformation("Total cpu time for this run: %g seconds (%g hours)\n\n",
829  // cpu_time, cpu_time/3600);
830  if (previous_cpu_time > 0)
831  egsInformation("%-40s%.2f (sec.) %.4f (hours)\n",
832  "CPU time including previous runs:",cpu_time+previous_cpu_time,
833  (cpu_time+previous_cpu_time)/3600);
834  egsInformation("%-40s%-14g\n","Histories per hour:",3600.*ndone/
835  (cpu_time+previous_cpu_time));
836  egsInformation("%-40s%-14lld\n","Number of random numbers used:",
837  app->randomNumbersUsed());
838  double ch_steps, all_steps;
839  app->getElectronSteps(ch_steps,all_steps);
840  egsInformation("%-40s%-14g\n","Number of electron CH steps:",
841  ch_steps);
842  //egsInformation("%-40s%14g\n","Number of all electron steps:",
843  // all_steps);
844  egsInformation("%-40s","Number of all electron steps:");
845  egsInformation("%-14g\n",all_steps);
846 
847  int n_par = app->getNparallel(),
848  i_par = app->getIparallel(),
849  i_first = app->getFirstParallel();
850  /* If parallel run and last job, trigger the app combineResults method */
851  return (n_par && i_par == i_first + n_par - 1) ? 1 : 0;
852 }
853 
856  if (err < 0) {
857  return err;
858  }
859  /* Check and wait for all jobs to finish */
860  if (watcher_job) {
861  int interval = 0, njobs_done = 0, njobs_done_old= 0;
862  while (interval < check_intervals) {
863  rco_sleep(milliseconds);
864  if (check_egsdat) {
865  njobs_done = app->howManyJobsDone();
866  //egsInformation("\n-> Finished %d jobs...\n",njobs_done);
867  if (njobs_done == npar - 1) {
868  watcher_job=false;//don't enter this after all jobs done!
869  break;
870  }
871  // Only combine if new jobs finished
872  if (njobs_done_old < njobs_done) {
873  egsInformation("=> Combining %d jobs ...\n",njobs_done);
874  app->combinePartialResults();
875  }
876  njobs_done_old = njobs_done;
877  }
878  interval++;
879  }
880  return 1;
881  }
882  /*I am not a watcher job, do not combine results yet!*/
883  return 0;
884 }
885 
886 int EGS_JCFControl::finishSimulation() {
888  if (err < 0) {
889  return err;
890  }
891  if (removed_jcf) {
892  return 0;
893  }
894  if (!readControlFile()) {
895  return -2;
896  }
897  njob--;
898  writeControlFile();
899  p->closeControlFile();
900  if (njob > 0 || removed_jcf) {
901  return 0;
902  }
903  string cfile = egsJoinPath(app->getAppDir(),app->getFinalOutputFile());
904  cfile += ".lock";
905 #ifdef WIN32
906  int res = _unlink(cfile.c_str());
907 #else
908  int res = unlink(cfile.c_str());
909 #endif
910  if (res) egsWarning("EGS_JCFControl::finishSimulation: failed to remove "
911  " the job control file %s\n",cfile.c_str());
912  removed_jcf = true;
913  return 1;
914 }
915 
916 EGS_JCFControl::~EGS_JCFControl() {
917  delete p;
918 }
919 
920 bool EGS_JCFControl::closeControlFile() {
921  return p->closeControlFile();
922 }
923 
924 bool EGS_JCFControl::lockControlFile() {
925  return p->lockControlFile();
926 }
927 
928 bool EGS_JCFControl::unlockControlFile() {
929  return p->unlockControlFile();
930 }
931 
932 bool EGS_JCFControl::rewindControlFile() {
933  return p->rewindControlFile();
934 }
935 
936 typedef EGS_RunControl *(*EGS_RunControlCreationFunction)(EGS_Application *);
937 
938 EGS_RunControl *EGS_RunControl::getRunControlObject(EGS_Application *a) {
939  if (!a) {
940  egsWarning("EGS_RunControl::getRunControlObject(): "
941  "null application?\n");
942  return 0;
943  }
944  EGS_Input *inp = a->getInput();
945  EGS_Input *irc = 0;
946  if (inp) {
947  irc = inp->getInputItem("run control");
948  }
949  /* If no input file, defaults to simple RCO for single runs and
950  to JCF RCO for parallel runs.
951  */
952  if (!irc) {
953  /*
954  egsWarning("EGS_RunControl::getRunControlObject(): "
955  "the application does not have any input\n");
956  return 0;
957  */
958  if (a->getNparallel() > 0) {
959  return new EGS_JCFControl(a);
960  }
961  else {
962  return new EGS_RunControl(a);
963  }
964  }
965  /*
966  EGS_Input *irc = inp->getInputItem("run control");
967  if( !irc ) {
968  egsWarning("EGS_RunControl::getRunControlObject(): "
969  "the application input has no 'run control' item\n");
970  return 0;
971  }
972  */
973  string libname;
974  int err = irc->getInput("library",libname);
975  EGS_RunControl *result;
976  if (!err) {
977  EGS_Library *lib = 0;
978  for (unsigned int j=0; j<rc_libs.size(); j++) {
979  if (libname == rc_libs[j]->libraryName()) {
980  lib = rc_libs[j];
981  break;
982  }
983  }
984  if (!lib) {
985  string dsodir = egsJoinPath("egs++","dso");
986  dsodir = egsJoinPath(dsodir,CONFIG_NAME);
987  dsodir = egsJoinPath(a->getHenHouse(),dsodir);
988  lib = new EGS_Library(libname.c_str(),dsodir.c_str());
989  lib->load();
990  if (!lib->isLoaded()) {
991  egsWarning("EGS_RunControl::getRunControlObject: failed to"
992  " load the library %s from %s\n",libname.c_str(),
993  dsodir.c_str());
994  delete irc;
995  return 0;
996  }
997  rc_libs.push_back(lib);
998  }
999  EGS_RunControlCreationFunction create =
1000  (EGS_RunControlCreationFunction) lib->resolve("createRunControl");
1001  if (!create) {
1002  egsWarning("EGS_RunControl::getRunControlObject: failed to"
1003  " resolve the run control creation function of library %s\n",
1004  libname.c_str());
1005  result = 0;
1006  }
1007  else {
1008  result = create(a);
1009  }
1010  }
1011  else {
1012  if (a->getNparallel() > 0) {
1013  vector<string> allowed_types;
1014  allowed_types.push_back("simple");
1015  allowed_types.push_back("uniform");
1016  allowed_types.push_back("balanced");
1017  int rco_t = irc->getInput("rco type",allowed_types,2);
1018  switch (rco_t) {
1019  case 0:
1020  result = new EGS_RunControl(a);
1021  break;
1022  case 1:
1023  result = new EGS_UniformRunControl(a);
1024  break;
1025  case 2:
1026  result = new EGS_JCFControl(a);
1027  break;
1028  default:
1029  result = new EGS_JCFControl(a);
1030  }
1031  }
1032  else {
1033  result = new EGS_RunControl(a);
1034  }
1035  }
1036  delete irc;
1037  return result;
1038 }
const string & getFinalOutputFile() const
Returns the base name of the final output file(s)
virtual int startSimulation()
Starts the simulation.
A class for dynamically loading shared libraries.
Definition: egs_library.h:52
virtual int readData()
Read intermediate results.
bool EGS_EXPORT egsGetI64(istream &data, EGS_I64 &n)
Reads a 64 bit integer from the stream data and assigns it to n. Returns true on success, false on failure.
virtual void getElectronSteps(double &ch_steps, double &all_steps) const
Get the number of electron steps taken.
A simple run control object for advanced EGSnrc C++ applications.
virtual void describeSimulation()
Describe the simulation.
EGS_Float time()
Returns the CPU time in seconds since start() was called.
Definition: egs_timer.cpp:106
virtual int combineResults()
Combine results from parallel runs.
virtual ~EGS_RunControl()
Destructor.
EGS_Input class header file.
bool EGS_EXPORT egsStoreI64(ostream &data, EGS_I64 n)
Writes the 64 bit integer n to the output stream data and returns true on success, false on failure.
virtual int combinePartialResults()
Combine intermediate results from parallel runs.
string egsJoinPath(const string &first, const string &second)
Join two path variables (or a path and a file name) using the platform specific directory separator a...
int getIparallel() const
Returns the job number in a parallel run.
Global egspp functions header file.
virtual void setSimulationChunk(EGS_I64 nstart, EGS_I64 nrun)
Set the simulation chunk.
int getNparallel() const
Returns the number of parallel jobs executing.
RCOType rco_type
RCO type to use.
parallel jobs with same numbe of histories
EGS_InfoFunction EGS_EXPORT egsFatal
Always use this function for reporting fatal errors.
virtual void getCurrentResult(double &sum, double &sum2, double &norm, double &count)
Report the current result.
EGS_Input * getInputItem(const string &key) const
Same as the previous function but now ownership remains with the EGS_Input object.
Definition: egs_input.cpp:245
virtual bool finishBatch()
Finish a batch.
EGS_InfoFunction EGS_EXPORT egsInformation
Always use this function for reporting the progress of a simulation and any other type of information...
EGS_Library class header file.
virtual bool startBatch(int, EGS_I64)
Start a new batch.
virtual int outputData()
Output intermediate results.
parallel jobs with balanced load via JCF
single job or multiple independent jobs
int getFirstParallel() const
Returns the first job number in a parallel run.
virtual EGS_I64 randomNumbersUsed() const
Returns the number of random numbers used.
void start()
Starts the time measurement.
Definition: egs_timer.cpp:102
bool load()
Loads the library.
const string & getHenHouse() const
Returns the HEN_HOUSE directory.
void rco_sleep(const int &mscnds)
Suspend execution for a given time (in ms)
A job control object for homogeneous computing environments (HCE).
A class for storing information in a tree-like structure of key-value pairs. This class is used throu...
Definition: egs_input.h:182
int finishSimulation()
Uses &#39;watcher&#39; jobs to determine if the simulation has finished.
EGS_Input * getInput()
Returns a pointer to the EGS_Input object containing the user input to the application found in the i...
const string & getAppDir() const
Returns the absolute path to the user code directory.
A &#39;job control file&#39; (JCF) RCO.
EGS_Input * takeInputItem(const string &key, bool self=true)
Get the property named key.
Definition: egs_input.cpp:226
EGS_Application class header file.
virtual int finishSimulation()
Finish the simulation.
EGS_RunControl(EGS_Application *app)
Creates an RCO for the application app.
int getInput(const string &key, vector< string > &values) const
Assign values to an array of strings from an input identified by key.
Definition: egs_input.cpp:338
Base class for advanced EGSnrc C++ applications.
EGS_RunControl and EGS_JCFControl class header file.
int howManyJobsDone()
Counts how many *.egsdat files in app folder.
EGS_InfoFunction EGS_EXPORT egsWarning
Always use this function for reporting warnings.