Using checkpoints for IQ-TREE2
Example 1 with timeout
#!/usr/bin/env bash
#SBATCH --job-name=iqtree
#SBATCH --output=iqtree-%j.out
#SBATCH --error=iqtree-%j.err
#SBATCH --open-mode=append
#SBATCH --time=7-00:00:00
#SBATCH --nodes=1
#SBATCH --cpus-per-task=8
#SBATCH --mem=40G
#SBATCH --requeue
# Paths
SCRATCH_DIR=$TMPDIR
NETWORK_DIR=$HOME/iqtree
mkdir -p "$NETWORK_DIR"
cd "$SCRATCH_DIR"
echo "Allocated memory (MB per node): ${SLURM_MEM_PER_NODE}"
MEM_GB=$(( SLURM_MEM_PER_NODE / 1024 ))
echo "Passing -mem ${MEM_GB}G to IQ-TREE2"
timeout 6d23h iqtree2 \
-s /network/storage/alignment.fasta \
-m MFP \
-B 1000 \
--cptime 300 \
--pre $HOME/my_job \
-T ${SLURM_CPUS_PER_TASK} \
-mem ${MEM_GB}G
# After completion, copy results back
cp * "$NETWORK_DIR/"
Alternative: periodic copy via signal trap
If you want to keep everything local for speed but copy checkpoints periodically or on preemption:
# Trap Slurm signal
trap 'cp my_job.ckp.gz /network/storage/checkpoints/' USR1Combine with --signal=B:USR1@60 in your SBATCH header.
Example2 with signal
#!/usr/bin/env bash
#SBATCH --job-name=iqtree-job
#SBATCH --output=iqtree-%j.out
#SBATCH --error=iqtree-%j.err
#SBATCH --open-mode=append
#SBATCH --time=7-00:00:00
#SBATCH --nodes=1
#SBATCH --cpus-per-task=8
#SBATCH --mem=40G
#SBATCH --requeue
#SBATCH --signal=B:USR1@60
# Paths
SCRATCH_DIR=$TMPDIR
NETWORK_DIR=$HOME/iqtree
mkdir -p "$NETWORK_DIR"
cd "$SCRATCH_DIR"
# Trap signal for checkpoint copy and requeue
trap 'echo "Signal received: copying checkpoint and requeuing"; cp *.ckp.gz "$NETWORK_DIR/"; scontrol requeue $SLURM_JOB_ID; exit 0' USR1
echo "Allocated memory (MB per node): ${SLURM_MEM_PER_NODE}"
MEM_GB=$(( SLURM_MEM_PER_NODE / 1024 ))
echo "Passing -mem ${MEM_GB}G to IQ-TREE2"
# Run IQ-TREE2 with timeout (7 days - 1 hour = 6d23h)
timeout 6d23h iqtree2 \
-s "$NETWORK_DIR/alignment.fasta" \
-m MFP \
-B 1000 \
--cptime 300 \
--pre "$NETWORK_DIR/my_job" \
-mem ${MEM_GB}G \
-T ${SLURM_CPUS_PER_TASK}
# After completion, copy results back
cp * "$NETWORK_DIR/"