([iter],[num-grps],[task],[qsub]) ;
; ; SOURCE: spider/docs/techs/recon1/Procs/pub-submit.spi New Jan 2016 ; ; PURPOSE: Procedure for starting job scripts on parallel cluster. ; Returns when all the jobs have successfully finished. ; ; INPUT REGISTERS: ; [iter] Current iteration ; [num-grps] Number of parallel groups ; [task] Specifies task started by: pub-refine-start ; [qsub] Specifies queing use (1 == PBS, 2 == PBS with memory limit) ; ; INPUT STRINGS: ; ? Execution script ? [run1] ; Script to be run by PBS ; ; OUTPUT FILES: ('????' denotes sync number, and '***' denotes group) ; [finished_file] jnk_sync_????_*** Signals when subscriber job finishes (one/group) Deleted ; [wait_file] jnk_wait_????_*** Signals when subscriber finishes copy (one/group) Deleted ; ; PROCEDURES CALLED: ; qsub qsub.pbs ; .. pub-refine-start pub-refine-start ; ..... refine-settings refine-settings.spi ; ..... refine-loop OR refine-loop.spi OR ; ..... refine-smanglooprefine-smangloop.spi ; ; -------------------------------- END BATCH HEADER ---------------------------- ; Temporary output file name for synchronization [finished_file] = 'jnk_sync_{****[rn]}_{***[grp]}' ; Created when parallel segment finished (one/group) ; Set SPIDER command line parameters [run2] = ' {%I0%[grp]} task={%I0%[task]} iter={%I0%[iter]} grp={%I0%[grp]} rn={****[rn]} pubsub=1' FR ; Read PBS script ? PBS script ? [run1] ; Generate 4 digit random number for semi-unique SYNC file numbering [rn] = int(ran(0.0)*9999) DO [grp] = 1,[num-grps] ; Loop over all groups ----------------- ; Publish task for each group. Usually will start: pub-refine-start ; pub-refine-start runs: run1 which depends on task IF ( [qsub] == 1 ) THEN SYS ; Call qsub for PBS queing qsub -v run="[run1] [run2]" qsub.pbs ELSE IF ( [qsub] == 2 ) THEN ; Need memory estimate for PBS queing @memforqsub([task],[grp],[num-angs],[incore-yn],[local-yn],[memMb]) SYS ; Call qsub for PBS queing qsub -l mem={%i0%[memMb]}mb -v run="[run1] [run2]" qsub.pbs ENDIF [max-wait] = 0 ; Copy to compute node delay time ZERO IF ( [max-wait] > 0 ) THEN ; Wait for compute node copy to local disk to minimize competition IQ SYNC DEL [waited] ; Check file existance then delete it [wait_file] ; Flag file (input) 10 [max-wait] ; Wait time SYS echo ' For local copy of group: {%I3%[grp]} Waited: {%I5%[waited]} seconds' ENDIF ENDDO ; End of: Loop over all groups ----------- ; Wait for all subscribers to finish this task (Previously in: pub-refine-wait) SYS echo -n " Waiting for: [run1] Iteration: {%I0%[iter]} Task={%I0%[task]} " ; date '+ TIME: %x %X' ; echo MY FL ; Flush results file DO [grp] = 1,[num-grps] ; Loop for waiting for all groups ------- !SYS !echo " Waiting for group {%I0%[grp]} ([finished_file])" IQ SYNC DEL ; Wait for file existence then delete it [finished_file] ; Completion file (deleted) 10 3600000 ; Re-Check time, max time SYS echo -n " Finished group: {%I0%[grp]} --- "; date '+ TIME: %x %X' MY FL ; Flush results file ENDDO ; End wait loop for all groups ------------- RE ;