MAG ecflow job:
1. admin ( /ecfmag/ecfnets/scripts/admin/mag )
JMAG_CKQUOTA @ wcoss (mag_ckquota.ecf @ ecf1)
export job="jmag_ckquota"
export pid=$$
export DATA=${DATAROOT}/${job}.${pid}
rm -rf $DATA
mkdir -p $DATA
cd $DATA
$SCRIPTSmag/exmag_checkquota.sh.ecf
If run on gyre :
$ /usr/lpp/mmfs/bin/mmlsquota -j nco-sib gpfs-gd2
if run on tide :
$ /usr/lpp/mmfs/bin/mmlsquota -j nco-sib gpfs-td2
cd /tmpnwprd1
rm -rf $DATA
log file : /nco/sib/magdev/com/output/prod/today/mag_ckquota.o%J
JMAG_CLEANUP @ wcoss (mag_cleanup.ecf @ ecf1)
export job="jmag_cleanup"
export pid=$$
export DATA=${DATAROOT}/${job}.${pid}
rm -rf $DATA
mkdir -p $DATA
cd $DATA
#1) clean up wcoss
$SCRIPTSmag/exmag_cleanup.sh.ecf
# Clean up GIF files : /nco/sib/magdev/com/mag/test/gifs
find ./${i} -mindepth 1 -maxdepth 1 -mmin +1560 -type d
find ./${i} -mindepth 1 -maxdepth 1 -mmin +1560 -type d -exec rm -rf '{}' ';'
# Clean up status files : /nco/sib/magdev/com/mag/test/status
find ${LOG_DIR} -mindepth 1 -maxdepth 1 -mtime +3 -type d
find ${LOG_DIR} -mindepth 1 -maxdepth 1 -mtime +3 -type d -exec rm -rf '{}' ';'
# Removing log files : /nco/sib/magdev/com/output/prod/today
find ${LOG_DIR} -mindepth 1 -maxdepth 1 -mtime +3 -type d
find ${LOG_DIR} -mindepth 1 -maxdepth 1 -mtime +3 -type d -exec rm -rf '{}' ';'
# Clean up tmp dirs : /nco/sib/magdev/tmpnwprd1
find ${TMP_DIR} -mindepth 1 -maxdepth 1 -mtime +3 -type d
find ${TMP_DIR} -mindepth 1 -maxdepth 1 -mtime +3 -type d -exec rm -rf '{}' ';'
#2) clean up rzdm
/usr/bin/ssh -t -t mag@ncorzdm.ncep.noaa.gov "bash /home/people/nco/mag/bin/exmag_cleanup_test.sh.ecf"
cd /tmpnwprd1
rm -rf $DATA
log file : /nco/sib/magdev/com/output/prod/today/mag_cleanup.o%J
JMAG_SETUP @ wcoss (ecmag_maintain.ecf @ ecf1)
Purpose : Create sub-dir like 20150915 and create symbolic link "today" linked to the current day.
$ cd /nco/sib/magdev/com/output/prod
$ ln -vTsf $out_dir/prod/$PDY $out_dir/prod/today
ln [OPTION]... [-T] TARGET LINK_NAME
-v : verbose, print name of each linked file
-T : treat LINK_NAME as a normal file
-s : make symbolic links instead of hard links
-f : force, remove destination files
export DATA=${NWROOT}/tmpnwprd1/JMAG_SETUP.${pid}
mkdir -p $DATA
cd $DATA
env
${NWROOT}/mag.${mag_ver}/scripts/exmag_setup.sh.ecf
${NWROOT}/mag.${mag_ver}/scripts/exMagDirClean.sh.ecf
cd ${NWROOT}/tmpnwprd1
rm -rf $DATA
exit
Log file : /nco/sib/magdev/com/output/prod/today/ecmag_maintain.o%J
2. mag processor
( /ecfmag/ecfnets/scripts/mag/mag_processor/${model}/mag_${model}_processor.ecf @ ecf1)
JMAG @ wcoss
export job=${job:-jmag.${MODEL}}
export pid=$$
export DATA=${DATAROOT}/${job}.${pid}
rm -rf $DATA
mkdir -p $DATA
cd $DATA
export jlogfile=${jlogfile:-$COMROOT/logs/jlogfiles/jlogfile.${job}.${pid}}
if [ "$hurr_model" == "yes" ]; then
mag_script=exmag_processor_hurr.pl
else
mag_script=exmag_processor.pl
fi
${SCRIPTSmag}/${mag_script} ${MAGPROCPL_FLAGS}
msg="JOB $job ENDED NORMALLY."
postmsg $jlogfile "$msg"
rm -rf $DATA
date
Eg: /ecfmag/ecfnets/scripts/mag/mag_processor/gfs/mag_gfs_processor.ecf
export MODEL=gfs
export MP_CMDFILE=poe_script
export HOMEmag=$NWROOT/mag.$mag_ver
$HOMEmag/jobs/JMAG
3. send2web
JSNDMAG2WEB
export job=${job:-jmag.${MODEL}}
export pid=$$
export DATA=${DATAROOT}/${job}.${pid}
rm -rf $DATA
mkdir -p $DATA
cd $DATA
export jlogfile=${jlogfile:-$COMROOT/logs/jlogfiles/jlogfile.${job}.${pid}}
$SCRIPTSmag/exsendmag2web.sh.ecf $MODEL $TABLEDIR/$TABLE $transfer_file $yyyymmdd $cycle
if [ "${MAG_TRANSFER}" = "ON" ]; then # If this is not running in development...
$HOMEmag/scripts/exmag_status_sync.sh.ecf
fi
cd ${BASE_DIR}/tmpnwprd1
rm -rf $DATA
Eg: /ecfmag/ecfnets/scripts/mag/mag_send2web/gfs/ecmag_sync_gfs.ecf
export MODEL=GFS
export TABLE=MAG_sync_table.tbl
export ncorzdm_username=mag;
export HOMEmag=$NWROOT/mag.$mag_ver;
$HOMEmag/jobs/JSNDMAG2WEB
rsync in exsendmag2web.sh.ecf :
/usr/bin/rsync -v -v --rsh="/usr/bin/ssh -c arcfour128,aes128-cbc,arcfour,aes128-ctr,blowfish-cbc -o macs=hmac-md5,hmac-md5-96 -o compression=yes" --stats --timeout=30 --update /nco/sib/magdev/com/mag/test/gifs/gfs/20151001/12/{allow.cfg,index.php} mag@ncorzdm:/home/www/nco_mag/test_data/gfs/20151001/12
rsync flag description:
-v : increase verbosity, the more -v given, the more info will be printed.
--rsh : specify remote shell to use.
--stats : give some file transfer status
--update : force rsync to skip any files which exist on the destination and have a modified time newer than source file.
/nco/sib/magdev/nwtest/mag.trunk_726/scripts/exsendmag2web.sh.ecf
GFS
/nco/sib/magdev/nwtest/mag.trunk_726/fix/MAG_sync_table.tbl
/nco/sib/magdev/com/mag/test/status/transfer/gfs_2015100112_69_133_153.transfer
20151001
12
How to switch host where ecflow submit jobs to?
$ ssh -Y Deyong.Xu@cpecflow1.ncep.noaa.gov
$ ecflowview &
from the ecFlow viewer:
halt (right-click on MAG, choose halt)
wait for jobs to bleed off and go to 4. Or proceed with c and d
set to complete (right-click on each, choose set complete):
ecmaintain
rsync_requeue
mag_cleanup
Kill all the jobs on tide / gyre as mag.dev account.
$ sudo -u mag.dev -i
$ bjobs -u mag.dev
$ bjobs -u mag.dev | grep devhigh | awk '{print "bkill", $1}'
(Now, cut and paste the results of step iii. to actually kill the jobs)
$ sudo -u mag.dev -i ( sudo to mag.dev account on cpecflow1 server )
$ vi /home/mag.dev/etc/prodmachinefile and switch the order of the wcoss host names (the development machine should be the first one)
from the ecFlow viewer:
restart (right-click on MAG, choose restart)
check each task, verify that it is queue, and the next time to run is after the current time (click on task, click on “i” icon on upper right bar, choose tab “Why”, and examine TASK line)
How make hidden transfer file into visible and trigger processing event in ecflow?
-----------------------------------------
“exmag_processor.pl”
-----------------------------------------
$transfer_file_hidden="${Config::status_dir}/transfer/.${model}_${date}${cycle}_${job}_${main::transfer_seq}_${random}.transfer";
if (! open ($trans_filehandle, "> $transfer_file_hidden")) {
logmsg($Config::fatal, "Error opening transfer file: $transfer_file_hidden: $!");
return 1;
}
logmsg($Config::info, "Poe job completed successfully, so make the transfer file ${transfer_file} visible:");
if (! rename $transfer_file_hidden, $transfer_file) {
logmsg($Config::fatal, "Error renaming the hidden transfer file to visible name:
${transfer_file}");
return 1;
}
if (! $main::development_mode) { # when not running at the command prompt
# dxu : set the event “processing” to be true in the corresponding model *.ecf .
`ecflow_client --event processing`;
}
Event and Trigger
Event:
The event keyword assigns an event to the task currently being defined. Only tasks can have events and they can be considered as an attribute of a task. There can be many events and they are displayed as nodes.
An event has a number and possibly a name. If it is only defined as a number, its name is the text representation of the number without leading zeroes. For example, event 007 can be accessed as event 7 or as event 007. Event's numbers must be positive and their name can contain only letters and digits. The use of letters is optional; the event name can consist simply of digits.
eg:
task x
event 1 # Can only be referred to as x:1
event 2 prepok # Can only be referred to as x:prepok
event 3 99 # This is asking for trouble!
real MAG example:
family ghm
task mag_ghm_full_processor
event 1 processing # dxu: 1 is useless ; processing = false
time 00:02 23:52 00:10
Trigger:
The purpose of an event is to signal partial completion of a task and to be able to trigger another job which is waiting this partial completion.
eg:
suite x
family f
task t1
event foo
task t2
trigger t1:foo == set # foo = set ; foo=true
#t1:foo is relative path.
Three steps to combine event and trigger :
1) Assign an event to a task.
task create_a_file
event event_A # event name : event_A , event_A = false .
2) Modify the task to change the event value to true.
In task create_a_file,
ecflow_client --event event_A # command "ecflow_client --event" sets
# event_A = true
3) Task copy_a_file checks value of event_A of task create_a_file and will run if event_A = true.
task copy_a_file
trigger create_a_file: event_A # check event_A’s value.
ecflow system: cpecflow1.ncep.noaa.gov
/ecfmag/ecfnets/defs
1. Directory structure:
(note : use “admin” for demo. )
admin.def
Purpose:
set ENV variables using keyword edit.
set cron schedule.
suite admin
repeat day 1
edit ECF_HOME '/ecfmag/ecfnets/scripts'
edit ECF_INCLUDE '/ecfmag/ecfnets/scripts'
edit ECF_OUT '/ecfmag/ecfnets/output'
edit ECF_TRIES '1'
edit ECF_FILES '/ecfmag/ecfnets/scripts'
edit ECF_JOB_CMD '%ECF_JOB% 1> %ECF_JOBOUT% 2>&1'
family mag
task mag_requeue
cron 00:14 23:49 00:10
## task rsync_requeue
## time 00:14 23:49 00:10
task mag_cleanup
edit ECF_JOB_CMD '/ecfmag/ecfutils/unixsubmit %ECF_JOB% %ECF_JOBOUT% ibmsp'
edit ECF_PASS 'FREE'
time 00:14
task ecmag_maintain
edit ECF_JOB_CMD '/ecfmag/ecfutils/unixsubmit %ECF_JOB% %ECF_JOBOUT% ibmsp'
edit ECF_PASS 'FREE'
time 00:07
task mag_ckquota
edit ECF_JOB_CMD '/ecfmag/ecfutils/unixsubmit %ECF_JOB% %ECF_JOBOUT% ibmsp'
edit ECF_PASS 'FREE'
cron 00:05 23:49 01:00
endfamily
endsuite
mag_requeue.ecf
…..
python ${pythondir}/node_status.py | grep complete >${magout}/mag_list
for model in `cat ${magout}/mag_list | awk -F" " '{print $1}'`
do
echo
echo "JOB IS COMPLETE - REQUEUE-ING TASK: $model"
echo
ecflow_client --requeue force $model
export err=$?
if [ $err -ne '0' ]
then
ecflow_client --abort="ecflow_client --force FAILED: $model NOT requeued"
fi
done
if [ -s ${magout}/mag_list ]
then
echo
echo "THE FOLLOWING HAVE BEEN REQUEUED:"
cat ${magout}/mag_list
echo
fi
rm -f ${magout}/mag_list
date
if test $err -eq '0'
then
ecflow_client --complete
else
ecflow_client --abort
fi