EGSnrc C++ class library  Report PIRS-898 (2021)
Iwan Kawrakow, Ernesto Mainegra-Hing, Frederic Tessier, Reid Townson and Blake Walters
egs_run_control.cpp
Go to the documentation of this file.
1 /*
2 ###############################################################################
3 #
4 # EGSnrc egs++ run control
5 # Copyright (C) 2015 National Research Council Canada
6 #
7 # This file is part of EGSnrc.
8 #
9 # EGSnrc is free software: you can redistribute it and/or modify it under
10 # the terms of the GNU Affero General Public License as published by the
11 # Free Software Foundation, either version 3 of the License, or (at your
12 # option) any later version.
13 #
14 # EGSnrc is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
17 # more details.
18 #
19 # You should have received a copy of the GNU Affero General Public License
20 # along with EGSnrc. If not, see <http://www.gnu.org/licenses/>.
21 #
22 ###############################################################################
23 #
24 # Author: Iwan Kawrakow, 2005
25 #
26 # Contributors: Frederic Tessier
27 # Hubert Ho
28 # Ernesto Mainegra-Hing
29 # Blake Walters
30 # Marc Chamberland
31 # Reid Townson
32 #
33 ###############################################################################
34 */
35 
36 
42 #include "egs_run_control.h"
43 #include "egs_application.h"
44 #include "egs_input.h"
45 #include "egs_functions.h"
46 #include "egs_library.h"
47 
48 #include <vector>
49 #include <ctime>
50 #include <cstdio>
51 
52 using namespace std;
53 
54 vector<EGS_Library *> rc_libs;
55 static int n_run_controls = 0;
56 
58  geomErrorMax(0), app(a), input(0), ncase(0), ndone(0), maxt(-1), accu(-1),
59  nbatch(10), restart(0), nchunk(1), cpu_time(0), previous_cpu_time(0),
60  rco_type(simple) {
61  n_run_controls++;
62  if (!app) egsFatal("EGS_RunControl::EGS_RunControl: it is not allowed\n"
63  " to construct a run control object on a NULL application\n");
64  input = app->getInput();
65  if (!input) {
66  egsWarning("EGS_RunControl::EGS_RunControl: the application has no"
67  " input\n");
68  return;
69  }
70  input = input->takeInputItem("run control");
71  if (!input) {
72  egsWarning("EGS_RunControl::EGS_RunControl: no 'run control' "
73  "input\n");
74  return;
75  }
76  double ncase_double;
77  int err = input->getInput("number of histories", ncase_double);
78  if (err) {
79  err = input->getInput("ncase", ncase_double);
80  if (err)
81  egsWarning("EGS_RunControl: missing/wrong 'ncase' or "
82  "'number of histories' input\n");
83  }
84  ncase = EGS_I64(ncase_double);
85  /*****************************************************
86  * Split histories into different parallel jobs.
87  * For the JCFO ncase reset to total as it is handled
88  * via the lock file mechanism dispatching smaller
89  * of histories chunks.
90  *****************************************************/
91  if (app->getNparallel()) {
92  ncase /= app->getNparallel();
93  }
94  err = input->getInput("nbatch",nbatch);
95  if (err) {
96  nbatch = 10;
97  }
98  err = input->getInput("max cpu hours allowed",maxt);
99  if (err) {
100  maxt = -1;
101  }
102  err = input->getInput("statistical accuracy sought",accu);
103  if (err) {
104  accu = -1;
105  }
106  err = input->getInput("geometry error limit", geomErrorMax);
107  if (err) {
108  geomErrorMax = 0;
109  }
110 
111  vector<string> ctype;
112  ctype.push_back("first");
113  ctype.push_back("restart");
114  ctype.push_back("analyze");
115  ctype.push_back("combine");
116  restart = input->getInput("calculation",ctype,0);
117 }
118 
120  if (input) {
121  delete input;
122  }
123  n_run_controls--;
124  if (!n_run_controls) {
125  while (rc_libs.size() > 0) {
126  delete rc_libs[rc_libs.size()-1];
127  rc_libs.pop_back();
128  }
129  }
130 }
131 
132 void EGS_RunControl::describeRCO() {
134  "Run Control Object (RCO):\n"
135  "=========================\n");
136  switch (rco_type) {
137  case simple:
138  egsInformation(" type = simple\n");
139  break;
140  case balanced:
141  egsInformation(" type = balanced (JCF)\n");
142  break;
143  case uniform:
144  egsInformation(" type = uniform\n");
145  break;
146  }
147 }
148 
149 bool EGS_RunControl::storeState(ostream &data) {
150  if (!egsStoreI64(data,ndone)) {
151  return false;
152  }
153  data << " " << (cpu_time+previous_cpu_time) << endl;
154  return data.good();
155 }
156 
157 bool EGS_RunControl::setState(istream &data) {
158  EGS_I64 ndone1;
159  if (!egsGetI64(data,ndone1)) {
160  return false;
161  }
162  ndone += ndone1;
163  ncase += ndone1;
164  data >> previous_cpu_time;
165  return data.good();
166 }
167 
168 bool EGS_RunControl::addState(istream &data) {
169  EGS_Float previous_cpu_time_save = previous_cpu_time;
170  if (!setState(data)) {
171  return false;
172  }
173  previous_cpu_time += previous_cpu_time_save;
174  return true;
175 }
176 
177 void EGS_RunControl::resetCounter() {
178  previous_cpu_time = 0;
179  cpu_time = 0;
180  timer.start();
181  ncase = 0;
182  ndone = 0;
183 }
184 
186  if (restart == 1 || restart == 2) {
187  if (app->readData()) {
188  return -1;
189  }
190  if (restart == 2) {
191  ncase = ndone;
192  egsInformation("\n\nResult analysis only\n\n");
193  return 1;
194  }
195  }
196  else if (restart == 3) {
197  app->describeSimulation();
198  egsInformation("\n\nCombine results only\n\n");
199  egsInformation("calling combineResults()\n");
200  int err = app->combineResults();
201  ncase = ndone;
202  return err ? -1 : 2;
203  }
204  app->describeSimulation();
205  time_t tinfo = time(0);
206  egsInformation("\n\nStarting simulation on %s\n",
207  asctime(localtime(&tinfo)));
208  if (restart == 0) {
209  egsInformation(" Fresh simulation of %lld histories\n\n\n",ncase);
210  }
211  else {
212  egsInformation(" Restarted simulation with %lld old and %lld"
213  " new histories\n\n\n",ndone,ncase-ndone);
214  }
215  timer.start();
216  return 0;
217 }
218 
219 bool EGS_RunControl::startBatch(int ibatch, EGS_I64 ncase_per_batch) {
220  if (!ibatch) egsInformation(
221  " Batch CPU time Result Uncertainty(%c)\n"
222  "==========================================================\n",'%');
223  if (maxt > 0 && ndone > 0) {
224  EGS_Float time_per_shower = (cpu_time + previous_cpu_time)/ndone;
225  EGS_Float extra_time = time_per_shower*ncase_per_batch;
226  if (cpu_time + extra_time > maxt*3600) {
227  egsWarning("\n\n*** Not enough time to finish another batch\n"
228  " => terminating simulation.\n\n");
229  return false;
230  }
231  }
232  egsInformation("%7d",ibatch+1);
233  ndone += ncase_per_batch;
234  return true;
235 }
236 
238  cpu_time = timer.time();
239  int out = app->outputData();
240  if (out) {
241  egsWarning("\n\noutputData() returned error code %d ?\n",out);
242  }
243  double sum, sum2, norm, count;
244  app->getCurrentResult(sum,sum2,norm,count);
245  double f, df;
246  if (sum > 0 && sum2 > 0 && norm > 0 && count > 1) {
247  f = sum*norm/count;
248  df = count*sum2/(sum*sum)-1;
249  if (df > 0) {
250  df = 100*sqrt(df/(count-1));
251  }
252  else {
253  df = 100;
254  }
255  }
256  else {
257  f = 0;
258  df = 100;
259  }
260  egsInformation(" %12.2f %14g %14.2f\n",cpu_time,f,df);
261  if (df < 100 && accu > 0 && df < accu) {
262  char c = '%';
263  egsWarning("\n\n*** Reached the requested uncertainty of %g%c\n"
264  " => terminating simulation.\n\n",accu,c);
265  return false;
266  }
267  return true;
268 }
269 
270 EGS_UniformRunControl::EGS_UniformRunControl(EGS_Application *a) :
271  EGS_RunControl(a), njob(0), npar(app->getNparallel()),
272  ipar(app->getIparallel()), ifirst(app->getFirstParallel()),
273  milliseconds(1000), check_intervals(5), check_egsdat(true),
274  watcher_job(false) {
275 
276  rco_type = uniform;
277 
278  if (input) {
279 
280  /*Change waiting time to check for parallel run completion*/
281  int dummy;
282  int err = input->getInput("interval wait time", dummy);
283  if (!err) {
284  milliseconds = dummy;
285  }
286 
287  /*Change how many times to check for parallel run completion*/
288  err = input->getInput("number of intervals", dummy);
289  if (!err) {
290  check_intervals = dummy;
291  }
292 
293  /* Define watcher jobs to check for parallel run completion*/
294  vector<int> w_jobs;
295  err = input->getInput("watcher jobs", w_jobs);
296  if (!err) {
297  for (int i = 0; i < w_jobs.size(); i++) {
298  if (ipar == w_jobs[i]) {
299  watcher_job = true;
300  break;
301  }
302  }
303  }
304  else { // use defaults
305  /* last job is watcher job */
306  if (ipar == ifirst + npar - 1) {
307  watcher_job = true;
308  }
309  else {
310  watcher_job = false;
311  }
312  }
313 
314  /* Request checking parallel run completion */
315  vector<string> check_options;
316  check_options.push_back("yes");
317  check_options.push_back("no");
318  int ichk = input->getInput("check jobs completed",check_options,0);
319  if (ichk != 0) {
320  check_egsdat = false; // true by default
321  }
322 
323  }
324  else { // use defaults if no RCO input found
325  /* last job is watcher job */
326  if (ipar == ifirst + npar - 1) {
327  watcher_job = true;
328  }
329  }
330 }
331 
332 int EGS_UniformRunControl::startSimulation() {
333 
334 
335  /* Check run completion based on *egsdat files requires erasing
336  existing files from previous runs.
337  */
338  if (check_egsdat) {
339  char buf[512];
340  sprintf(buf,"%s_w%d.egsdat",app->getFinalOutputFile().c_str(), ipar);
341  string datFile = egsJoinPath(app->getAppDir(),buf);
342  if (remove(datFile.c_str()) == 0) {
343  egsWarning("EGS_UniformRunControl: %s deleted\n",
344  datFile.c_str());
345  }
346  }
347 
349 }
350 
351 void EGS_UniformRunControl::describeRCO() {
352 
353  EGS_RunControl::describeRCO();
354 
355  if (watcher_job) {
356  if (check_egsdat) {
358  " Watcher job: remains running after completion checking\n"
359  " for other jobs finishing every %d s for %d s!\n",
360  milliseconds/1000, check_intervals*milliseconds/1000);
361  }
362  else {
364  " Option to check for finishing jobs is OFF!\n\n");
365  }
366  }
367 
368 }
369 
370 #ifdef WIN32
371 
372  #include <io.h>
373  #include <stdio.h>
374  #include <fcntl.h>
375  #include <sys/types.h>
376  #include <sys/stat.h>
377  #include <sys/locking.h>
378  #include <windows.h>
379 
380  #define OPEN_FILE _open
381  #define CLOSE_FILE _close
382  #define CREATE_FLAGS _O_CREAT | _O_EXCL | _O_RDWR, _S_IREAD | _S_IWRITE
383  #define OPEN_FLAGS _O_RDWR,_S_IREAD | _S_IWRITE
384  #define WAIT_FOR_FILE Sleep(1000)
385  #define WRITE_FILE _write
386  #define READ_FILE _read
387 
388 #else
389 
390  #include <unistd.h>
391  #include <fcntl.h>
392  #include <sys/types.h>
393  #include <sys/stat.h>
394  #include <errno.h>
395  #include <string.h>
396  #include <stdio.h>
397 
398  #define OPEN_FILE open
399  #define CLOSE_FILE close
400  #define CREATE_FLAGS O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR
401  #define OPEN_FLAGS O_RDWR
402  #define WAIT_FOR_FILE sleep(1)
403  #define WRITE_FILE write
404  #define READ_FILE read
405 
406 #endif
407 
408 #ifndef SKIP_DOXYGEN
409 
414 class EGS_LOCAL EGS_FileLocking {
415 public:
416  int fd;
417  bool is_locked;
418  int ntry;
419 #ifndef WIN32
420  struct flock fl_write, fl_unlock;
421 #endif
422  EGS_FileLocking() : fd(-1), is_locked(false), ntry(15) {
423 #ifndef WIN32
424  fl_write.l_type = F_WRLCK;
425  fl_write.l_whence = SEEK_SET;
426  fl_write.l_start = 0;
427  fl_write.l_len = 0;
428  fl_unlock.l_type = F_UNLCK;
429  fl_unlock.l_whence = SEEK_SET;
430  fl_unlock.l_start = 0;
431  fl_unlock.l_len = 0;
432 #endif
433  };
434  ~EGS_FileLocking() {
435  if (fd > 0) {
436  CLOSE_FILE(fd);
437  }
438  };
439  bool createControlFile(const char *fname) {
440  is_locked = false;
441  if (fd > 0) {
442  CLOSE_FILE(fd);
443  }
444  fd = OPEN_FILE(fname,CREATE_FLAGS);
445  egsWarning("createControlFile: file=%s fd=%d\n",fname,fd);
446  if (fd < 0) {
447  egsWarning("createControlFile(): open failed! (fd=%d)\n",fd);
448 #ifndef WIN32
449  perror("System error was");
450 #endif
451  }
452  return lockControlFile();
453  };
454  bool openControlFile(const char *fname) {
455  is_locked = false;
456  if (fd > 0) {
457  CLOSE_FILE(fd);
458  }
459  for (int t=0; t<ntry; t++) {
460  fd = OPEN_FILE(fname,OPEN_FLAGS);
461  if (fd > 0) {
462  break;
463  }
464  WAIT_FOR_FILE;
465  }
466  return (fd > 0);
467  };
468  bool closeControlFile() {
469  if (fd > 0) {
470  int res = CLOSE_FILE(fd);
471  fd = -1;
472  return !res;
473  }
474  return true;
475  };
476  bool lockControlFile() {
477  if (is_locked) {
478  return true;
479  }
480  if (fd < 0) {
481  return false;
482  }
483 #ifdef WIN32
484  long np = _lseek(fd,0L,SEEK_SET);
485  if (np) {
486  egsWarning("lockControlFile: _lseek returned %d?\n",np);
487  return false;
488  }
489  int res = _locking(fd,_LK_LOCK,1000000L);
490  if (!res) {
491  is_locked = true;
492  return true;
493  }
494  return false;
495 #else
496  for (int i1=0; i1<5; i1++) {
497  for (int i2=0; i2<12; i2++) {
498  int res = fcntl(fd,F_SETLK,&fl_write);
499  if (!res) {
500  is_locked = true;
501  return true;
502  }
503  WAIT_FOR_FILE ;
504  }
505  egsWarning("lockControlFile: failed to lock file for "
506  "12 seconds...\n");
507  }
508  return false;
509 #endif
510  };
511  bool unlockControlFile() {
512  if (!is_locked) {
513  return true;
514  }
515  if (fd < 0) {
516  return false;
517  }
518 #ifdef WIN32
519  int np = _lseek(fd,0L,SEEK_SET);
520  if (np) {
521  egsWarning("unlockControlFile: _lseek returned %d?\n",np);
522  return false;
523  }
524  int res = _locking(fd,_LK_UNLCK,1000000L);
525 #else
526  int res = fcntl(fd,F_SETLKW,&fl_unlock);
527 #endif
528  if (!res) {
529  is_locked = false;
530  return true;
531  }
532  return false;
533  };
534  bool rewindControlFile() {
535  if (fd < 0) {
536  return false;
537  }
538  if (!is_locked) {
539  if (!lockControlFile()) {
540  return false;
541  }
542  }
543 #ifdef WIN32
544  return !_lseek(fd,0,SEEK_SET);
545 #else
546  return !lseek(fd,0,SEEK_SET);
547 #endif
548  };
549 };
550 
551 #endif
552 
553 EGS_JCFControl::EGS_JCFControl(EGS_Application *a, int Nbuf) :
554  EGS_RunControl(a), tsum(0), tsum2(0), tcount(0), norm(1), last_sum(0),
555  last_sum2(0), last_count(0), njob(0), npar(app->getNparallel()),
556  ipar(app->getIparallel()), ifirst(app->getFirstParallel()),
557  first_time(true), removed_jcf(false), nbuf(Nbuf), p(new EGS_FileLocking) {
558 
559  rco_type = balanced;
560 
561  /* Recover initial number of histories */
562  if (npar) {
563  ncase *= npar;
564  }
565 
566  if (input) {
567  int err = input->getInput("nchunk",nchunk);
568  if (err) {
569  nchunk = 10;
570  }
571  }
572  else {
573  nchunk = 10;
574  }
575  if (nbuf < 0) {
576  nbuf = 1024;
577  }
578  buf = new char [nbuf];
579  nleft = ncase;
580  ntot = 0;
581  //egsInformation("EGS_JCFControl::EGS_JCFControl:\n");
582  //egsInformation(" ncase = %lld nleft = %lld nchunk = %d\n",
583  // nleft,ncase,nchunk);
584 }
585 
586 bool EGS_JCFControl::createControlFile() {
587  string cfile = egsJoinPath(app->getAppDir(),app->getFinalOutputFile());
588  cfile += ".lock";
589  if (!p->createControlFile(cfile.c_str())) {
590  egsWarning("EGS_JCFControl: failed to create or lock the "
591  " job control file %s\n\n",cfile.c_str());
592  return false;
593  }
594  if (p->fd < 0) {
595  return false;
596  }
597  writeControlString();
598  int nwant = strlen(buf)+1;
599  int nwrite = WRITE_FILE(p->fd,buf,nwant);
600  if (nwrite != nwant) {
601  return false;
602  }
603  return p->unlockControlFile();
604 }
605 
606 bool EGS_JCFControl::openControlFile() {
607  string cfile = egsJoinPath(app->getAppDir(),app->getFinalOutputFile());
608  cfile += ".lock";
609  if (!p->openControlFile(cfile.c_str())) {
610  egsWarning("EGS_JCFControl: failed to open the "
611  " job control file %s\n\n",cfile.c_str());
612  return false;
613  }
614  return true;
615 }
616 
617 #ifdef NO_SSTREAM
618  #include <strstream>
619  #define MY_OSTREAM std::ostrstream
620  #define MY_ISTREAM std::istrstream
621 #else
622  #include <sstream>
623  #define MY_OSTREAM std::ostringstream
624  #define MY_ISTREAM std::istringstream
625 #endif
626 
627 bool EGS_JCFControl::writeControlString() {
628  //if( first_time ) { start_time = time(0); first_time = false; }
629  if (first_time) {
630  start_time = time(0);
631  }
632  /*
633  MY_OSTREAM data(buf);
634  //ostream &data = cout;
635  if( !egsStoreI64(data,ntot) ) return false;
636  if( !egsStoreI64(data,nleft) ) return false;
637  data << " " << njob << " " << tsum << " " << tsum2 << " " << tcount << " ";
638  double f = tsum*norm, df;
639  if( tsum > 0 && tsum2 > 0 && norm > 0 && tcount > 1 ) {
640  df = tcount*tsum2/(tsum*tsum)-1;
641  if( df > 0 ) df = 100*sqrt(df/(tcount-1)); else df = 100;
642  } else df = 100;
643  data << f << " " << df << " " << start_time << endl;
644  if( f > 0 && df < 100 ) egsInformation("\nCombined result from all "
645  "parallel jobs: %g +/- %g%%\n\n",f,df);
646  egsInformation("EGS_JCFControl::writeControlString: <%s>\n",buf);
647  return data.good();
648  */
649  double f = tsum*norm, df;
650  if (tsum > 0 && tsum2 > 0 && norm > 0 && tcount > 1) {
651  f = tsum*norm/tcount;
652  df = tcount*tsum2/(tsum*tsum)-1;
653  if (df > 0) {
654  df = 100*sqrt(df/(tcount-1));
655  }
656  else {
657  df = 100;
658  }
659  }
660  else {
661  df = 100;
662  }
663  sprintf(buf,"%lld %lld %d %lg %lg %lg %lg %lg %ld ",ntot,nleft,njob,tsum,
664  tsum2,tcount,f,df,start_time);
665  return true;
666 }
667 
668 bool EGS_JCFControl::getCombinedResult(double &f, double &df) const {
669  if (tsum > 0 && tsum2 > 0 && norm > 0 && tcount > 1) {
670  f = tsum*norm/tcount;
671  df = tcount*tsum2/(tsum*tsum)-1;
672  if (df > 0) {
673  df = 100*sqrt(df/(tcount-1));
674  }
675  else {
676  df = 100;
677  }
678  return true;
679  }
680  df = 100;
681  f = 0;
682  return false;
683 }
684 
685 bool EGS_JCFControl::readControlString() {
686  /*
687  MY_ISTREAM data(buf);
688  if( !egsGetI64(data,ntot) ) return false;
689  if( !egsGetI64(data,nleft) ) return false;
690  double f,df;
691  data >> njob >> tsum >> tsum2 >> tcount >> f >> df >> start_time;
692  return data.good();
693  */
694  double f,df;
695  int res = sscanf(buf,"%lld %lld %d %lg %lg %lg %lg %lg %ld",
696  &ntot,&nleft,&njob,&tsum,&tsum2,&tcount,&f,&df,&start_time);
697  if (res == EOF || res != 9) {
698  return false;
699  }
700  return true;
701 }
702 
703 int EGS_JCFControl::startSimulation() {
705  if (res) {
706  return res;
707  }
708  bool ok = (ipar == ifirst) ? createControlFile() : openControlFile();
709  if (ok) {
710  egsInformation(" Parallel run with %d jobs and %d chunks per "
711  "job\n\n\n",npar,nchunk);
712  return 0;
713  }
714  return -99;
715 }
716 
717 bool EGS_JCFControl::readControlFile() {
718  if (!p->rewindControlFile()) {
719  egsWarning("EGS_JCFControl: failed to rewind the job control file\n");
720  return false;
721  }
722  int res = READ_FILE(p->fd,buf,nbuf-1);
723  if (res <= 0) {
724  p->unlockControlFile();
725  egsWarning("EGS_JCFControl: failed to read the job control file\n");
726  return false;
727  }
728  buf[res] = 0;
729  if (!readControlString()) {
730  p->unlockControlFile();
731  egsWarning("EGS_JCFControl: failed to read from the control string"
732  " <%s>\n",buf);
733  return false;
734  }
735  return true;
736 }
737 
738 bool EGS_JCFControl::writeControlFile() {
739  if (!writeControlString()) {
740  egsWarning("EGS_JCFControl::writeControlFile: failed to write to the "
741  "control string\n");
742  return false;
743  }
744  if (!p->rewindControlFile()) {
745  egsWarning("EGS_JCFControl: failed to rewind the job control file\n");
746  return false;
747  }
748  int nwant = strlen(buf)+1;
749  int nwrite = WRITE_FILE(p->fd,buf,nwant);
750  if (!p->unlockControlFile()) {
751  egsWarning("EGS_JCFControl::writeControlFile: failed to unlock the "
752  "control file\n");
753  return false;
754  }
755  if (nwrite != nwant) {
756  egsWarning("EGS_JCFControl::getNextChunk: could write only %d "
757  "instead of %d chars to the job control file?\n",nwrite,nwant);
758  return false;
759  }
760  return true;
761 }
762 
763 EGS_I64 EGS_JCFControl::getNextChunk() {
764  if (!readControlFile()) {
765  return -1;
766  }
767  if (first_time) {
768  first_time = false;
769  njob++;
770  }
771  double sum, sum2, count;
772  app->getCurrentResult(sum,sum2,norm,count);
773  tsum += sum - last_sum;
774  tsum2 += sum2 - last_sum2;
775  tcount += count - last_count;
776  last_sum = sum;
777  last_sum2 = sum2;
778  last_count = count;
779  EGS_I64 nrun = ncase/(npar*nchunk);
780  if (nrun < 1) {
781  nrun = 1;
782  }
783  if (nrun > nleft) {
784  nrun = nleft;
785  }
786  if (nrun > 0) {
787  app->setSimulationChunk(ntot,nrun,npar,nchunk);
788  }
789  nleft -= nrun;
790  ntot += nrun;
791  writeControlFile();
792  double f,df;
793  if (accu > 0 && getCombinedResult(f,df)) {
794  if (df < 100 && df < accu) {
795  char c = '%';
796  egsWarning("\n\n*** After combining the results of all parallel "
797  "jobs the requested\n uncertainty of %g%c was reached: %g%c\n"
798  " => terminating simulation.\n\n",accu,c,df,c);
799  return 0;
800  }
801  }
802  return nrun;
803 }
804 
818 void rco_sleep(const int &mscnds) {
819 #ifdef WIN32
820  Sleep(mscnds);
821 #else
822  usleep(mscnds * 1000);
823 #endif
824 }
825 
827  cpu_time = timer.time();
828  egsInformation("\n\nFinished simulation\n\n");
829  egsInformation("%-40s%.2f (sec.) %.4f(hours)\n",
830  "Total cpu time for this run:",cpu_time,cpu_time/3600);
831  //egsInformation("Total cpu time for this run: %g seconds (%g hours)\n\n",
832  // cpu_time, cpu_time/3600);
833  if (previous_cpu_time > 0)
834  egsInformation("%-40s%.2f (sec.) %.4f (hours)\n",
835  "CPU time including previous runs:",cpu_time+previous_cpu_time,
836  (cpu_time+previous_cpu_time)/3600);
837  egsInformation("%-40s%-14g\n","Histories per hour:",3600.*ndone/
838  (cpu_time+previous_cpu_time));
839  egsInformation("%-40s%-14lld\n","Number of random numbers used:",
840  app->randomNumbersUsed());
841  double ch_steps, all_steps;
842  app->getElectronSteps(ch_steps,all_steps);
843  egsInformation("%-40s%-14g\n","Number of electron CH steps:",
844  ch_steps);
845  //egsInformation("%-40s%14g\n","Number of all electron steps:",
846  // all_steps);
847  egsInformation("%-40s","Number of all electron steps:");
848  egsInformation("%-14g\n",all_steps);
849 
850  int n_par = app->getNparallel(),
851  i_par = app->getIparallel(),
852  i_first = app->getFirstParallel();
853  /* If parallel run and last job, trigger the app combineResults method */
854  return (n_par && i_par == i_first + n_par - 1) ? 1 : 0;
855 }
856 
859  if (err < 0) {
860  return err;
861  }
862  /* Check and wait for all jobs to finish */
863  if (watcher_job) {
864  int interval = 0, njobs_done = 0, njobs_done_old= 0;
865  while (interval < check_intervals) {
866  rco_sleep(milliseconds);
867  if (check_egsdat) {
868  njobs_done = app->howManyJobsDone();
869  //egsInformation("\n-> Finished %d jobs...\n",njobs_done);
870  if (njobs_done == npar - 1) {
871  watcher_job=false;//don't enter this after all jobs done!
872  break;
873  }
874  // Only combine if new jobs finished
875  if (njobs_done_old < njobs_done) {
876  egsInformation("=> Combining %d jobs ...\n",njobs_done);
877  app->combinePartialResults();
878  }
879  njobs_done_old = njobs_done;
880  }
881  interval++;
882  }
883  return 1;
884  }
885  /*I am not a watcher job, do not combine results yet!*/
886  return 0;
887 }
888 
889 int EGS_JCFControl::finishSimulation() {
891  if (err < 0) {
892  return err;
893  }
894  if (removed_jcf) {
895  return 0;
896  }
897  if (!readControlFile()) {
898  return -2;
899  }
900  njob--;
901  writeControlFile();
902  p->closeControlFile();
903  if (njob > 0 || removed_jcf) {
904  return 0;
905  }
906  string cfile = egsJoinPath(app->getAppDir(),app->getFinalOutputFile());
907  cfile += ".lock";
908 #ifdef WIN32
909  int res = _unlink(cfile.c_str());
910 #else
911  int res = unlink(cfile.c_str());
912 #endif
913  if (res) egsWarning("EGS_JCFControl::finishSimulation: failed to remove "
914  " the job control file %s\n",cfile.c_str());
915  removed_jcf = true;
916  return 1;
917 }
918 
919 EGS_JCFControl::~EGS_JCFControl() {
920  delete p;
921 }
922 
923 bool EGS_JCFControl::closeControlFile() {
924  return p->closeControlFile();
925 }
926 
927 bool EGS_JCFControl::lockControlFile() {
928  return p->lockControlFile();
929 }
930 
931 bool EGS_JCFControl::unlockControlFile() {
932  return p->unlockControlFile();
933 }
934 
935 bool EGS_JCFControl::rewindControlFile() {
936  return p->rewindControlFile();
937 }
938 
939 typedef EGS_RunControl *(*EGS_RunControlCreationFunction)(EGS_Application *);
940 
941 EGS_RunControl *EGS_RunControl::getRunControlObject(EGS_Application *a) {
942  if (!a) {
943  egsWarning("EGS_RunControl::getRunControlObject(): "
944  "null application?\n");
945  return 0;
946  }
947  EGS_Input *inp = a->getInput();
948  EGS_Input *irc = 0;
949  if (inp) {
950  irc = inp->getInputItem("run control");
951  }
952  /* If no input file, defaults to simple RCO for single runs and
953  to JCF RCO for parallel runs.
954  */
955  if (!irc) {
956  /*
957  egsWarning("EGS_RunControl::getRunControlObject(): "
958  "the application does not have any input\n");
959  return 0;
960  */
961  if (a->getNparallel() > 0) {
962  return new EGS_JCFControl(a);
963  }
964  else {
965  return new EGS_RunControl(a);
966  }
967  }
968  /*
969  EGS_Input *irc = inp->getInputItem("run control");
970  if( !irc ) {
971  egsWarning("EGS_RunControl::getRunControlObject(): "
972  "the application input has no 'run control' item\n");
973  return 0;
974  }
975  */
976  string libname;
977  int err = irc->getInput("library",libname);
978  EGS_RunControl *result;
979  if (!err) {
980  EGS_Library *lib = 0;
981  for (unsigned int j=0; j<rc_libs.size(); j++) {
982  if (libname == rc_libs[j]->libraryName()) {
983  lib = rc_libs[j];
984  break;
985  }
986  }
987  if (!lib) {
988  string dsodir = egsJoinPath("egs++","dso");
989  dsodir = egsJoinPath(dsodir,CONFIG_NAME);
990  dsodir = egsJoinPath(a->getHenHouse(),dsodir);
991  lib = new EGS_Library(libname.c_str(),dsodir.c_str());
992  lib->load();
993  if (!lib->isLoaded()) {
994  egsWarning("EGS_RunControl::getRunControlObject: failed to"
995  " load the library %s from %s\n",libname.c_str(),
996  dsodir.c_str());
997  delete irc;
998  return 0;
999  }
1000  rc_libs.push_back(lib);
1001  }
1002  EGS_RunControlCreationFunction create =
1003  (EGS_RunControlCreationFunction) lib->resolve("createRunControl");
1004  if (!create) {
1005  egsWarning("EGS_RunControl::getRunControlObject: failed to"
1006  " resolve the run control creation function of library %s\n",
1007  libname.c_str());
1008  result = 0;
1009  }
1010  else {
1011  result = create(a);
1012  }
1013  }
1014  else {
1015  if (a->getNparallel() > 0) {
1016  vector<string> allowed_types;
1017  allowed_types.push_back("simple");
1018  allowed_types.push_back("uniform");
1019  allowed_types.push_back("balanced");
1020  int rco_t = irc->getInput("rco type",allowed_types,2);
1021  switch (rco_t) {
1022  case 0:
1023  result = new EGS_RunControl(a);
1024  break;
1025  case 1:
1026  result = new EGS_UniformRunControl(a);
1027  break;
1028  case 2:
1029  result = new EGS_JCFControl(a);
1030  break;
1031  default:
1032  result = new EGS_JCFControl(a);
1033  }
1034  }
1035  else {
1036  result = new EGS_RunControl(a);
1037  }
1038  }
1039  delete irc;
1040  return result;
1041 }
Base class for advanced EGSnrc C++ applications.
virtual EGS_I64 randomNumbersUsed() const
Returns the number of random numbers used.
virtual void describeSimulation()
Describe the simulation.
virtual int outputData()
Output intermediate results.
int getNparallel() const
Returns the number of parallel jobs executing.
EGS_Input * getInput()
Returns a pointer to the EGS_Input object containing the user input to the application found in the i...
virtual void getElectronSteps(double &ch_steps, double &all_steps) const
Get the number of electron steps taken.
virtual void getCurrentResult(double &sum, double &sum2, double &norm, double &count)
Report the current result.
int getFirstParallel() const
Returns the first job number in a parallel run.
int howManyJobsDone()
Counts how many *.egsdat files in app folder.
virtual void setSimulationChunk(EGS_I64 nstart, EGS_I64 nrun, int npar, int nchunk)
Set the simulation chunk.
const string & getFinalOutputFile() const
Returns the base name of the final output file(s)
const string & getAppDir() const
Returns the absolute path to the user code directory.
virtual int combineResults()
Combine results from parallel runs.
const string & getHenHouse() const
Returns the HEN_HOUSE directory.
int getIparallel() const
Returns the job number in a parallel run.
virtual int combinePartialResults()
Combine intermediate results from parallel runs.
virtual int readData()
Read intermediate results.
A class for storing information in a tree-like structure of key-value pairs. This class is used throu...
Definition: egs_input.h:182
EGS_Input * takeInputItem(const string &key, bool self=true)
Get the property named key.
Definition: egs_input.cpp:226
EGS_Input * getInputItem(const string &key) const
Same as the previous function but now ownership remains with the EGS_Input object.
Definition: egs_input.cpp:245
int getInput(const string &key, vector< string > &values) const
Assign values to an array of strings from an input identified by key.
Definition: egs_input.cpp:338
A 'job control file' (JCF) RCO.
A class for dynamically loading shared libraries.
Definition: egs_library.h:52
bool isLoaded() const
Returns true if the library is loaded, false otherwise.
bool load()
Loads the library.
void * resolve(const char *func)
Returns the address of the exported symbol func.
A simple run control object for advanced EGSnrc C++ applications.
RCOType rco_type
RCO type to use.
@ balanced
parallel jobs with balanced load via JCF
@ uniform
parallel jobs with same numbe of histories
@ simple
single job or multiple independent jobs
EGS_RunControl(EGS_Application *app)
Creates an RCO for the application app.
virtual int startSimulation()
Starts the simulation.
virtual bool startBatch(int, EGS_I64)
Start a new batch.
virtual ~EGS_RunControl()
Destructor.
virtual bool finishBatch()
Finish a batch.
virtual int finishSimulation()
Finish the simulation.
EGS_Float time()
Returns the CPU time in seconds since start() was called.
Definition: egs_timer.cpp:106
void start()
Starts the time measurement.
Definition: egs_timer.cpp:102
A job control object for homogeneous computing environments (HCE).
int finishSimulation()
Uses 'watcher' jobs to determine if the simulation has finished.
EGS_Application class header file.
Global egspp functions header file.
EGS_Input class header file.
EGS_Library class header file.
void rco_sleep(const int &mscnds)
Suspend execution for a given time (in ms)
EGS_RunControl and EGS_JCFControl class header file.
bool EGS_EXPORT egsStoreI64(ostream &data, EGS_I64 n)
Writes the 64 bit integer n to the output stream data and returns true on success,...
EGS_InfoFunction EGS_EXPORT egsInformation
Always use this function for reporting the progress of a simulation and any other type of information...
EGS_InfoFunction EGS_EXPORT egsFatal
Always use this function for reporting fatal errors.
bool EGS_EXPORT egsGetI64(istream &data, EGS_I64 &n)
Reads a 64 bit integer from the stream data and assigns it to n. Returns true on success,...
string egsJoinPath(const string &first, const string &second)
Join two path variables (or a path and a file name) using the platform specific directory separator a...
EGS_InfoFunction EGS_EXPORT egsWarning
Always use this function for reporting warnings.