5 View Processed Read Status

source("workflow/scripts/common.R")

library(tidyverse, suppressPackageStartupMessages())
library(ggtext)

stats1 <- read_table("results/qc/seqkit1/seqkit_stats.txt", show_col_types = F) %>% 
  mutate(file = str_replace_all(file, ".*/", "")) %>% 
  mutate(file = str_replace_all(file, ".fastq.gz", "")) %>% 
  select(file, rawreads = num_seqs)

stats2 <- read_table("results/qc/seqkit2/seqkit_stats.txt", show_col_types = F) %>% 
  mutate(file = str_replace_all(file, ".*/", "")) %>% 
  mutate(file = str_replace_all(file, ".fastq.gz", "")) %>% 
  select(file, trimmed = num_seqs)

stats3 <- read_table("results/qc/seqkit3/seqkit_stats.txt", show_col_types = F) %>% 
  mutate(file = str_replace_all(file, ".*/", "")) %>% 
  mutate(file = str_replace_all(file, ".fastq.gz", "")) %>% 
  select(file, decontam = num_seqs) 

stats <- inner_join(stats1, stats2, by = "file") %>% 
  inner_join(., stats3, by = "file") %>%
  pivot_longer(-file, names_to = "variable", values_to = "count") %>%
  mutate(variable = factor(variable, 
         levels = c("rawreads", "trimmed", "decontam"))) 

stats %>% 
  ggplot(aes(x = fct_inorder(file), y = count/1E3, fill = variable)) +  
  theme_cdi() +
  geom_col(position = "dodge") +
  labs(x = "Sample", y = "Number of Reads (X1000)", fill = "Color Code") +
  scale_y_continuous(expand = c(0.02, 0.0))

ggsave(file="figures/samples_hist.png", width=5, height=5)
ggsave(file="figures/samples_hist.svg", width=5, height=5)


stats %>% 
  ggplot(aes(x = fct_inorder(variable), y = count/1E3, fill = variable)) +  
  theme_classic() +
  geom_col(position = "dodge") +
  labs(x = "Processed Read", y = "Number of Reads (X500)", fill = "Color Code") +
  scale_y_continuous(expand = c(0.02, 0.0))

ggsave(file="figures/qc_hist.png", width=5, height=5)
ggsave(file="figures/qc_hist.svg", width=5, height=5)