4 Remove Contaminated Reads

4.1 BBDuk read decontamination

#!/bin/bash

echo PROGRESS: Read decontamination
cd resources/reads/trimmed

for i in `ls -1 *_1.fastq.gz | sed 's/_1.fastq.gz//'`
  do
  bbduk.sh -Xmx3g \
    in1=$i\_1.fastq.gz \
    in2=$i\_2.fastq.gz \
    out1=decontam/$i\_1.fastq.gz \
    out2=decontam/$i\_2.fastq.gz \
    outm1=decontam/matchedphix/$i\_1.fastq.gz \
    outm2=decontam/matchedphix/$i\_2.fastq.gz \
    ref=~/bbmap/resources/phix174_ill.ref.fa.gz \
    k=25 \
    hdist=1 \
    overwrite=True
  done

4.2 FastQC - MultiQC on decontaminated reads


#!/bin/bash

echo PROGRESS: FastQC - Getting read quality scores of decontaminated reads.

INPUTDIR="resources/reads/trimmed/decontam"
FASTQC="results/qc/fastqc3"
mkdir -p "${FASTQC}"
fastqc "${INPUTDIR}"/*.fastq.gz -o "${FASTQC}"
## Warning in readLines(script_path): incomplete final line found on
## 'workflow/scripts/multiqc_decontam.sh'
#!/bin/bash

echo PROGRESS: MultiQC - Getting summary of decontaminated read quality scores.

FASTQC="results/qc/fastqc3"
MULTIQC="results/qc/multiqc3"

mkdir -p "${MULTIQC}"
multiqc --force --data-dir "${FASTQC}" -o "${MULTIQC}" --export

4.3 Seqkit on decontaminated reads

#!/bin/bash

echo PROGRESS: Getting stats of the decontaminated reads.

INPUTDIR="resources/reads/trimmed/decontam"
SEQKIT="results/qc/seqkit3"
mkdir -p "${SEQKIT}"
seqkit stat "${INPUTDIR}"/*.fastq.gz >"${SEQKIT}"/seqkit_stats.txt