#!/bin/bash # ------------------------------------------------------------------------- # Author : Brandon J. Fisel # Date : 2011/12/13 # Description : Script for recursive submission of RACM simulations. # Instructions : CASE_NAME value is set to the name of the CCSM batch job # located after PBS -N inside garnet.run. The CONTINUE flag # may be set to TRUE to resubmit the simulation or FALSE to # end after 1 successful run. RESUBMIT value should be set # to the number of times the model will be resubmitted + 1. # A RESUBMIT value of 2 will only resubmit the model once. # STOP_N value is the number of times the model will # resubmit itself upon a model error - this value may be # adjusted when you know the model crashes at random, # otherwise left at 1. rfilename and sfilename create files # that are used as a counter to prevent infinite # resubmits - the names may be changed. When running with # CONTINUE="TRUE" you should check that CONTINUE_RUN value # is set to TRUE in env_run.xml. Before running # batch_submit.sh you should check that rfilename and # sfilename files are removed from the current directory. # Before running batch_submit.sh the first time you should # 'chmod +x batch_submit.sh'. batch_submit.sh assumes the # machine you are running on is Garnet. Run batch_submit.sh # in the background using SCREEN. # # Configure here. CASE_NAME="r27RB_x1" CONTINUE="TRUE" RESUBMIT=12 STOP_N=10 rfilename="./rvalue" sfilename="./svalue" # # Below does not need to be configured. # ------------------------------------------------------------------------- # Check if rvalue, svalue already exists if [ -r $rfilename ]; then if grep $RESUBMIT $rfilename then echo "rvalue ALREADY EXISTS, EXITING..." kill -SIGINT $$ fi fi if [ -r $sfilename ]; then if grep $STOP_N $sfilename then echo "svalue ALREADY EXISTS, EXITING..." kill -SIGINT $$ fi fi # Check continue run RACM status if [ -r "env_run.xml" ]; then if [ $CONTINUE = "TRUE" ]; then if grep "entry id=\"CONTINUE_RUN\" value=\"FALSE\"" \ "env_run.xml" then echo "RESTART MISMATCH, EXITING..." kill -SIGINT $$ fi fi fi # Submit job to queue echo "SUBMITTING JOB..." qsub $CASE_NAME.garnet.run # Get job id from queue echo "GETTING JOBID..." export USER_NAME=$(whoami) JOBID=`qstat | grep $USER_NAME | grep $CASE_NAME | \ awk -F "." '{print $1}'` echo "JOBID IS -- $JOBID" # Wait for job to be removed from queue echo "WAITING FOR JOB COMPLETION..." while true do if [ `echo "$JOBID"` = `qstat | grep $USER_NAME | grep \ $CASE_NAME | awk -F "." '{print $1}'` ]; then sleep 600 else echo "JOBID HAS COMPLETED" fi done # Check that job out file exists and job completion status if [ -r "$CASE_NAME.o$JOBID" ]; then n1="Model did not complete" if grep "$n1" $CASE_NAME.o$JOBID then COUNTS=0 touch $sfilename . $sfilename COUNTS=$(( $COUNTS + 1 )) if [ $COUNTS -gt $STOP_N ]; then echo "JOB EXCEEDED STOP_N, EXITING..." kill -SIGINT $$ else echo "COUNTS=$COUNTS" > $sfilename . ./batch_submit.sh fi else if [ $CONTINUE = "FALSE" ]; then echo "JOB COMPLETED, EXITING..." kill -SIGINT $$ else COUNTR=0 touch $rfilename . $rfilename COUNTR=$(( $COUNTR + 1 )) if [ $COUNTR -gt $RESUBMIT ]; then echo "JOB EXCEEDED RESUBMIT, EXITING..." kill -SIGINT $$ else echo "COUNTR=$COUNTR" > $rfilename . ./batch_submit.sh fi fi fi else echo "ERROR 1" kill -SIGINT $$ fi