#!/bin/bash
#SBATCH --account=nn9447k
#SBATCH --partition=accel
#SBATCH --gres=gpu:2
#SBATCH --nodes=1
#SBATCH --ntasks=2
#SBATCH --time=24:00:00
#SBATCH --mem-per-cpu=10G

set -o errexit
source ~/.bashrc
module purge
module use -a /cluster/shared/nlpl/software/modules/etc
module load nlpl-fairseq/0.9.0/3.7
conda activate nlp

PYTHONUNBUFFERED=x fairseq-train --task language_modeling \
  ./$1-$2/$3-bin \
  --save-dir ./$1-$2/ckpt_transformer_$3 \
  --tensorboard-logdir ./$1-$2/stats/$3 \
  --arch transformer_lm --share-decoder-input-output-embed \
  --dropout 0.1 \
  --optimizer adam --adam-betas '(0.9, 0.98)' --weight-decay 0.01 --clip-norm 0.0 \
  --lr 0.0005 --lr-scheduler inverse_sqrt --warmup-updates 4000 --warmup-init-lr 1e-07 \
  --tokens-per-sample 512 --sample-break-mode none \
  --max-tokens 2048 --update-freq 16
