#!/bin/bash
set -euo pipefail
# Path to DRAGEN hashtable
DRAGEN_HASH_TABLE=<REF_DIR>
# Path to output directory for the DRAGEN run
OUTPUT=<OUT_DIR>
# File prefix for DRAGEN output files
PREFIX=<OUT_PREFIX>
# Population SNP VCF. It can be retrieved from catalogs of population variation
# such as the 1000 genome project or other large cohort discovery efforts.
# Only high-frequency SNPs should be included. A suitable file can be retrieved
# from the GATK resource bundle: 1000G_phase1.snps.high_confidence.vcf.gz
CNV_POP_VCF=<POPULATION_VCF_PATH>
# Path to VC systematic noise BED file. In tumor-only variant calling, this filter
# is essential for removing systematic noise observed in normal samples. Prebuilt
# systematic noise files are available for download on the Illumina DRAGEN Bio-IT
# Platform support site page. Alternatively, running the somatic TO pipeline on
# normal samples can generate a systematic noise file. We recommend using a
# systematic noise file based on normal samples that match the library prep of
# the tumor samples. A prebuilt systematic noise BED file can be downloaded from
# https://support.illumina.com/sequencing/sequencing_software/dragen-bio-it-platform/product_files.html
VC_SYSTEMATIC_NOISE_FILE=<VC_SYSTEMATIC_NOISE_BED_FILE_PATH>
# The Nirvana annotation database is downloadable at
# https://support-docs.illumina.com/SW/DRAGEN_v310/Content/SW/DRAGEN/IAE_DownloadData.htm
NIRVANA_ANNOTATION_FOLDER=<NIRVANA_ANNOTATION_FOLDER_PATH>
# Define the input sources, select fastq list, fastq, bam, or cram.
INPUT_FASTQ_LIST="
--tumor-fastq-list $TUMOR_FASTQ_LIST \
--tumor-fastq-list-sample-id $TUMOR_FASTQ_LIST_SAMPLE_ID \
"
INPUT_FASTQ="
--tumor-fastq1 $TUMOR_FASTQ1 \
--tumor-fastq2 $TUMOR_FASTQ2 \
--RGSM-tumor $RGSM_TUMOR \
--RGID-tumor $RGID_TUMOR \
"
INPUT_BAM="
--tumor-bam-input $TUMOR_BAM \
"
INPUT_CRAM="
--tumor-cram-input $TUMOR_CRAM \
"
# Select input source, here in this example we use INPUT_FASTQ_LIST
INPUT_OPTIONS="
--ref-dir $DRAGEN_HASH_TABLE \
$INPUT_FASTQ_LIST \
"
OUTPUT_OPTIONS="
--output-directory $OUTPUT \
--output-file-prefix $PREFIX \
"
MA_OPTIONS="
--enable-map-align true \
--enable-sort true \
--enable-duplicate-marking true \
"
CNV_OPTIONS="
--enable-cnv true \
--cnv-population-b-allele-vcf $CNV_POP_VCF \
"
SNV_OPTIONS="
--enable-variant-caller true \
--vc-systematic-noise $VC_SYSTEMATIC_NOISE_FILE \
--vc-enable-germline-tagging true \
--enable-variant-annotation true \
--variant-annotation-data $NIRVANA_ANNOTATION_FOLDER \
--variant-annotation-assembly $REFERENCE \
"
SV_OPTIONS="
--enable-sv true \
--sv-systematic-noise $SV_SYSTEMATIC_NOISE_BEDPE \
"
SNV_SV_DEDUPLICATION_OPTIONS="
--enable-variant-deduplication true \
"
# Construct final command line
CMD="
dragen \
$INPUT_OPTIONS \
$OUTPUT_OPTIONS \
$MA_OPTIONS \
$CNV_OPTIONS \
$SNV_OPTIONS \
$SV_OPTIONS \
$SNV_SV_DEDUPLICATION_OPTIONS \
"
# Execute
echo $CMD
bash -c $CMD