Analysis
- The phyloseq R package provides a robust framework for downstream
microbiome analysis and visualization, and the scripts used here were
adapted from
the
phyloseq tutorial.
- Results and visualizations are reported based on SILVA for
consistency; however, taxonomy assignments from the other databases may
be loaded to reproduce the same workflow using the scripts provided
below.
Run Docker
- Prepare
seqtab_nochim.RDS and
DADA2_taxonomy_matrix_SILVA.rds, which we generated from
the last session, in the /Desktop/MiSeq_SOP folder
- If you don’t have them, you can download them:
seqtab_nochim.RDS
and
DADA2_taxonomy_matrix_SILVA.rds
- Open a terminal and run the command to start the Microbiome Docker
container
$ docker run -it -v ~/Desktop/MiSeq_SOP:/home/hub1 bioinfohub/microbiome R
Load R packages
library(dada2)
library(phyloseq)
library(dplyr)
library(tidyr)
library(tibble)
library(htmlwidgets)
library(htmltools)
library(sankeyD3)
library(Biostrings)
library(ggplot2)
library(data.table)
Directory
settings
- You should see both
seqtab_nochim.RDS and
DADA2_taxonomy_matrix_SILVA.rds files
base_dir <- "/home/hub1"
dir(base_dir)
Import ASV table
if (!exists("seqtab_nochim")) {
seqtab_nochim <- readRDS(file.path(base_dir, "seqtab_nochim.rds"))
}
if (!exists("tax_mat")) {
tax_mat <- readRDS(file.path(base_dir, "DADA2_taxonomy_matrix_SILVA.rds"))
}
dim(tax_mat)
## [1] 196 7
Alpha
diversity over time
- Visualize within-sample diversity using Shannon and Simpson
indices across sampling Day, with points colored by Group
(Early/Late) and shaped by Sex (Male/Female).
Group highlighted
by color
group_cols <- c("E" = "#009988", "L" = "#EE7733") # teal and orange
rich_plot <- plot_richness(phylo_obj,
x = "Day", measures = c("Shannon", "Simpson"),
color = "Group", shape = "Sex"
) +
theme_bw() +
scale_color_manual(values = group_cols) +
geom_point(size = 3)
pdf(file.path(base_dir, "alpha_diversity_by_group.pdf"), width = 6, height = 6)
print(rich_plot)
dev.off()

Sex highlighted by
color
sex_cols <- c("M" = "#0077BB", "F" = "#BB5566") # blue and red
rich_plot <- plot_richness(phylo_obj,
x = "Day", measures = c("Shannon", "Simpson"),
shape = "Group", color = "Sex"
) +
theme_bw() +
scale_color_manual(values = sex_cols) +
geom_point(size = 3)
pdf(file.path(base_dir, "alpha_diversity_by_sex.pdf"), width = 6, height = 6)
print(rich_plot)
dev.off()

Beta diversity
- Assess between-sample community differences using
Bray–Curtis distances on relative-abundance data and
visualize them with NMDS, with points colored by Group (Early/Late) and
shaped by Sex (Male/Female)
ps_nz <- prune_samples(sample_sums(phylo_obj) > 0, phylo_obj)
otu <- as(otu_table(ps_nz), "matrix")
if (taxa_are_rows(ps_nz)) otu <- t(otu)
otu[!is.finite(otu)] <- 0
otu_table(ps_nz) <- otu_table(otu, taxa_are_rows = FALSE)
ps_prop <- transform_sample_counts(ps_nz, function(x) x / sum(x))
nmds_bray <- ordinate(ps_prop, method = "NMDS", distance = "bray", trace = 0)
ordi_plot <- plot_ordination(ps_prop, nmds_bray, color = "Group", shape = "Sex") +
theme_bw() +
scale_color_manual(values = group_cols)
pdf(file.path(base_dir, "beta_diversity.pdf"), width = 6, height = 6)
print(ordi_plot)
dev.off()

Taxonomic relative
abundance
- Plot the top 20 most abundant ASVs as relative
abundance bar charts, aggregated and colored by Taxa, and faceted by
Group (Early/Late) and Sex (Male/Female) across sampling days.
top <- names(sort(taxa_sums(phylo_obj), decreasing = TRUE))[1:20]
ps_top <- transform_sample_counts(phylo_obj, function(OTU) OTU / sum(OTU))
ps_top <- prune_taxa(top, ps_top)
df_plot <- psmelt(ps_top)
df_plot$Family <- as.character(df_plot$Family)
df_plot$Family[is.na(df_plot$Family) | df_plot$Family == ""] <- "Unassigned"
dt <- as.data.table(df_plot)
dt_sum <- dt[, .(Abundance = mean(Abundance, na.rm = TRUE)), by = .(Category, Family)]
dt_sum[, Abundance := Abundance / sum(Abundance), by = Category]
top20_taxa <- ggplot(dt_sum, aes(x = Category, y = Abundance, fill = Family)) +
geom_col(color = NA) +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_fill_brewer(palette = "Set2", drop = FALSE) +
theme_bw() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)
) +
coord_flip()
pdf(file.path(base_dir, "top20_taxa_by_group.pdf"), width = 8, height = 5)
print(top20_taxa)
dev.off()

Taxonomic lineage
profiles by category
- Taxonomic composition is summarized as lineage-level flows, i.e.,
Sankey chart, from higher to lower ranks for each
category, enabling rapid comparison of dominant clades and where
group-specific differences emerge along the taxonomic hierarchy.
source("/home/rstudio/R/generate_sankey_plot.R")
meta_df <- data.frame(phyloseq::sample_data(phylo_obj), stringsAsFactors = FALSE)
meta_df$Sample <- rownames(meta_df)
tt_cols <- colnames(phyloseq::tax_table(phylo_obj))
# E_M (Early + Male), E_F (Early + Female), L_M (Late + Male), L_F (Late + Female)
cat_value <- "E_M"
dat <- build_sankey_data(ps = phylo_obj, meta_df = meta_df, category_value = cat_value)
title_text <- sprintf("%s(#Samples=%d, #Taxa=%d, #Reads=%d)", cat_value, dat$nsamples, dat$ntaxa, dat$total_reads)
w <- render_one_sankey(dat, title_text)
htmlwidgets::saveWidget(w, file = file.path(base_dir, paste0("sankey_", cat_value, ".html")))
