Genome-resolved metagenomics revealed novel microbial taxa and distinct metabolism from macroscopic microbial mat structures inhabiting anoxic deep reefs of a Maldivian Blue Hole
Lapo Doni 1,2, Annalisa Azzola 1,2, Caterina Oliveri 1, Emanuele Bosi 1,2, Manon Auguste 1,2, Carla Morri 1,3, Carlo Nike Bianchi 1,3, Monica Montefalcone 1,2, Luigi Vezzulli 1,2*
1 Department of Earth, Environmental and Life Sciences (DiSTAV), University of Genoa, Corso Europa 26, 16132 Genoa, Italy. 2 National Biodiversity Future Center, Palermo, Italy 3 Department of Integrative Marine Ecology (EMI), Stazione Zoologica Anton Dohrn - National Institute of Marine Biology, Ecology and Biotechnology, Genoa Marine Centre (GMC), Villa del Principe, Piazza del Principe 4, 16126 Genoa, Italy
MAGs with metaWRAP
conda activate metawrap-env
mkdir READ_QC
metawrap read_qc -1 P_SPSEA-07-22-N1715_S6_L007_R1_001.fastq-005.gz -2 P_SPSEA-07-22-N1715_S6_L007_R2_001.fastq-010.gz -t 24 -o READ_QC/
for i in P_SPSEA-07-22-N1715_S6_L007_R1_001.fastq-005.gz_val_1.fq.gz
do
prefix=$(basename $i _SPSEA-07-22-N1715_S6_L007_R1_001.fastq-005.gz_val_1.fq.gz)
mv P_SPSEA-07-22-N1715_S6_L007_R1_001.fastq-005.gz_val_1.fq.gz ../CLEAN_READS/ALL_READS_1.fastq.gz
mv P_SPSEA-07-22-N1715_S6_L007_R2_001.fastq-010.gz_val_2.fq.gz ../CLEAN_READS/ALL_READS_2.fastq.gz
done
metawrap assembly -1 CLEAN_READS/ALL_READS_1.fastq.gz -2 CLEAN_READS/ALL_READS_2.fastq.gz -t 30 -o ASSEMBLY
metawrap binning -o INITIAL_BINNING -t 10 -a ASSEMBLY/final_assembly.fasta --metabat2 --maxbin2 --concoct CLEAN_READS/*fastq.gz
metawrap bin_refinement -o BIN_REFINEMENT -t 10 -A INITIAL_BINNING/metabat2_bins/ -B INITIAL_BINNING/maxbin2_bins/ -C INITIAL_BINNING/concoct_bins/ -c 50 -x 10
metawrap quant_bins -b BIN_REFINEMENT/metawrap_50_10_bins -o QUANT_BINS -a ASSEMBLY/final_assembly.fasta CLEAN_READS/*fastq.gz
MAGs taxonomy with with GTDB-Tk
conda activate gtdbtk-2.1.0
mkdir gtdbtk
gtdbtk classify_wf -x .fa --genome_dir . --out_dir gtdbtk --scratch_dir gtdbtk/scratch_dir --cpus 15
MAGs QC with with CheckM
checkm lineage_wf -t 10 -x fa . ./checkm -f ./checkm/checkm_lineage_wf_overview_qa.txt --tab_table
Dereplication with dRep
dRep dereplicate ncbi_derep/ -g *.fna --contamination 10
ANI with pyani
average_nucleotide_identity.py -i . -o ANIb_output -m ANIb -g -v 2>&1 > ANIb_output/error.log
AAI with compareM
comparem aai_wf . AAI --file_ext fa
GToTree -f fasta_files.txt -H Bacteria -j 30
iqtree -s Aligned_SCGs.faa -B 1000 -alrt 1000
Metabolic analysis of all MAGs with METABOLIC
conda activate METABOLIC_v4.0
perl ~/METABOLIC_running_folder/METABOLIC/METABOLIC-C.pl -t 30 -in-gn metawrap_bins -r reads.txt -o METABOLIC-C_out -m-cutoff 0.75 -kofam-db full
LUCA-likeness with Melange
conda activate snakemake
snakemake --use-conda --cores 30
Outuput->>Cog_PA.csv & Cog_description.csv
in R
library(tidyverse)
cog_luca <- read_lines("cog_luca.txt")
cog_pa <- read_csv("Cog_PA.csv")
Cog_description <- read_csv("Cog_description.csv")
COG_LUCA_in_bins <- cog_pa %>%
filter(index %in% cog_luca)
dim(COG_LUCA_in_bins)
COG_LUCA_with_description <- COG_LUCA_in_bins %>%
left_join(Cog_description, by = "index")
dim(COG_LUCA_with_description)
luca_cog_counts <- cog_pa %>%
filter(index %in% cog_luca) %>%
select(-index) %>%
summarise(across(everything(), sum))
total_cogs_per_genome <- cog_pa %>%
select(-index) %>%
summarise(across(everything(), sum))
luca_cog_percentages <- luca_cog_counts / total_cogs_per_genome * 100
print(luca_cog_percentages,digits = 3)
Detection of eukaryotes in the Mat with EukDetect
conda activate eukdetect
snakemake --snakefile rules/eukdetect_eukfrac.rules --configfile Pdefault_configfile.yml --cores 20 runall