diff --git a/.gitignore b/.gitignore
index 112189a0a6a312add8be1f3a52ad5d3f87026b5f..511f4a000b6c75e9bfe801c79a95887597bbe221 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,10 @@
 figures/output/**
 figures/data/nanostats_summary.tsv
 figures/data/crispr_summary.tsv
+figures/data/plasflow_summary.tsv
+figures/data/rgi_summary.tsv
 **/.snakemake/
+**/__pycache__/
 slurm-*.out
 *.tmp
 *.log
\ No newline at end of file
diff --git a/2019_GDB/scripts/get_mappability.sh b/2019_GDB/scripts/get_mappability.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6435129f0f48ed64d388c6ee62909d2baf557648
--- /dev/null
+++ b/2019_GDB/scripts/get_mappability.sh
@@ -0,0 +1,30 @@
+#!/bin/bash -l
+# Usage: sbatch -t 1:00:00 -N 1 -n 4 get_mappability.sh
+
+date
+cd /scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/results/mapping
+conda activate anvio6       # or any environment that as "samtools"
+
+# metaG
+samtools flagstat sr/bwa_mem/ONT3_MG_xx_Rashi_S11_reads-x-ONT3_MG_xx_Rashi_S11-megahit_contigs.bam > sr_mappability.txt
+samtools flagstat lr/merged/no_barcode/no_barcode_reads-x-no_barcode-flye_contigs.bam > lr_mappability.txt
+
+samtools flagstat bwa_merged_metaspades_hybrid/bwa_merged_metaspades_hybrid.bam > hybrid_bwa_merged.txt 
+samtools flagstat bwa_sr_metaspades_hybrid/bwa_sr_metaspades_hybrid.bam > hybrid_bwa_sr.txt
+samtools flagstat bwa_lr_metaspades_hybrid/bwa_lr_metaspades_hybrid.bam > hybrid_bwa_lr.txt
+
+samtools flagstat mmi_merged_metaspades_hybrid/mmi_merged_metaspades_hybrid.bam > hybrid_mmi_merged.txt
+samtools flagstat mmi_sr_metaspades_hybrid/mmi_sr_metaspades_hybrid.bam > hybrid_mmi_sr.txt
+samtools flagstat mmi_lr_metaspades_hybrid/mmi_lr_metaspades_hybrid.bam > hybrid_mmi_lr.txt
+
+# metaT
+samtools flagstat ./metaT/sr/FastSelectHalf1_MT_Rashi_S12_reads-x-lr_no_barcode_sr_ONT3_MG_xx_Rashi_S11-metaspades_hybrid_contigs.bam > hybrid_metaT_sr.txt
+samtools flagstat ./metaT/sr/FastSelectHalf1_MT_Rashi_S12_reads-x-ONT3_MG_xx_Rashi_S11-megahit_contigs.bam > megahit_metaT_sr.txt
+ 
+# collating all the results in one place
+for file in *.txt; do echo $file; egrep 'total | mapped' $file | head -n 2; done | paste - - - | \
+   sed $'1 i\\\nsample\ttotal\tmapped\tpercent_mapped' | sed 's/ + 0 in total (QC-passed reads + QC-failed reads)//g' | \
+     sed s'/ + 0 mapped//g' | sed 's/ : N\/A)//g' | sed s'/(//g' | sed s'/.txt//g' | \
+       awk '{print$0=$1"\t"$2"\t"$3"\t"$4}' > mappability_index.tsv
+       
+date
\ No newline at end of file
diff --git a/config/CONFIG.yaml b/config/CONFIG.yaml
index aab8911bf4efe9daef1f357fbf111082335ffbd0..94b70d8b5571e73a4f9748709f2aa01bd9fce1f3 100755
--- a/config/CONFIG.yaml
+++ b/config/CONFIG.yaml
@@ -1,5 +1,5 @@
-# steps: "assembly_annotation mapping metaT mmseq binning taxonomy analysis"
-steps: "binning taxonomy analysis"
+# steps: "assembly_annotation mapping metaT mmseq binning taxonomy"
+steps: "mapping metaT mmseq binning taxonomy"
 data_dir: "data"
 results_dir: "results"
 db_dir: "dbs"
diff --git a/figures/README.md b/figures/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2bd5bb5c9465b3e1e0294ca2631707738d0fa69c
--- /dev/null
+++ b/figures/README.md
@@ -0,0 +1,24 @@
+# About
+
+This directory contains code to create figures for the manuscript.
+
+- `data/`: data for the plots
+  - it is not required, the paths to the files can be set in `figures.yml`
+- `envs/`: `conda` YAML files
+  - `envs/R.yml`: R-packages required for the figures
+  - `envs/figures.yml`: requirements to run the `Snakemake` pipeline
+- `src/`: scripts required to generate the figures
+- `src_SBB/`: scripts used by S. B. B. to generate figures
+  - for backup purposes
+- `figures.smk`: `Snakemake` file to generate figures
+- `figures.yml`: config file for `figures.smk`
+- `README.md`: this file
+
+# Generating figures
+
+```bash
+conda create -f envs/figures.yml
+conda activate ont_figures
+# rm -n to execute the steps
+snakemake -s figures.smk --cores 1 --use-conda -r -p -n
+```
\ No newline at end of file
diff --git a/figures/data/.gitkeep b/figures/data/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/figures/data/2019_GDB_protein_partial_genes.txt b/figures/data/2019_GDB_protein_partial_genes.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbef47e79649f61fc7e02eeebda15c4fd05f1c4a
--- /dev/null
+++ b/figures/data/2019_GDB_protein_partial_genes.txt
@@ -0,0 +1,5 @@
+tool	metaspades	metaspades_hybrid
+prodigal_partial	251907	169318
+prodigal_total	365813	341322
+cdhit_unique	27526	63911
+cdhit_total	365813	341322
diff --git a/figures/data/bwa_lr_metaspades_hybrid_HQ_bins.txt b/figures/data/bwa_lr_metaspades_hybrid_HQ_bins.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fbf7e9eab71d9d3df5689304370374bb96a207f
--- /dev/null
+++ b/figures/data/bwa_lr_metaspades_hybrid_HQ_bins.txt
@@ -0,0 +1,11 @@
+maxbin_output.024	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R	97.32	2.48	0.00
+maxbin_output.021	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D	98.63	0.00	0.00
+maxbin_output.015	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor	98.07	3.51	8.33
+maxbin_output.020	d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium;s__Phascolarctobacterium	99.98	2.89	0.00
+maxbin_output.001	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides	99.18	0.50	0.00
+maxbin_output.022	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella;s__Barnesiella	95.85	0.38	0.00
+maxbin_output.027	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Butyricimonas;s__Butyricimonas	94.98	2.31	33.33
+maxbin_output.031	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Odoribacter;s__Odoribacter	88.51	2.69	0.00
+maxbin_output.007	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	100.00	5.74	0.00
+maxbin_output.012	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Tidjanibacter;s__Tidjanibacter	99.52	1.44	0.00
+maxbin_output.023	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__;s__	95.97	0.67	0.00
diff --git a/figures/data/bwa_merged_metaspades_hybrid_HQ_bins.txt b/figures/data/bwa_merged_metaspades_hybrid_HQ_bins.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26f9e6ada62f07a566238928ed8d8f7e85ee14ce
--- /dev/null
+++ b/figures/data/bwa_merged_metaspades_hybrid_HQ_bins.txt
@@ -0,0 +1,27 @@
+bwa_merged_metaspades_hybrid.metabat.58	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R	97.32	0.00	0.00
+maxbin_output.020	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D	98.63	0.48	0.00
+bwa_merged_metaspades_hybrid.metabat.26	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium	94.29	0.00	0.00
+bwa_merged_metaspades_hybrid.metabat.6	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Oscillibacter;s__Oscillibacter	87.58	1.85	75.00
+bwa_merged_metaspades_hybrid.metabat.25	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-492;s__CAG-492	71.52	5.01	7.69
+bwa_merged_metaspades_hybrid.metabat.22	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor	97.58	1.45	0.00
+bwa_merged_metaspades_hybrid.metabat.5_sub	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Faecalicatena;s__Faecalicatena	89.13	1.29	75.00
+bwa_merged_metaspades_hybrid.metabat.17	d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Succiniclasticum;s__Succiniclasticum	78.13	0.00	0.00
+maxbin_output.024	d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium;s__Phascolarctobacterium	99.98	2.89	0.00
+bwa_merged_metaspades_hybrid.metabat.54	d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__CAG-196;s__CAG-196	77.34	1.66	0.00
+bwa_merged_metaspades_hybrid.metabat.34	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA6398;s__UBA6398	88.41	1.28	33.33
+maxbin_output.002	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides	67.71	0.00	0.00
+bwa_merged_metaspades_hybrid.metabat.43	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides_A;s__Bacteroides_A	86.91	1.67	40.00
+bwa_merged_metaspades_hybrid.metabat.32_sub	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides_B;s__Bacteroides_B	92.95	1.67	50.00
+bwa_merged_metaspades_hybrid.metabat.80	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides	78.08	0.13	0.00
+bwa_merged_metaspades_hybrid.metabat.55	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella;s__Barnesiella	95.85	0.00	0.00
+bwa_merged_metaspades_hybrid.metabat.2	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Coprobacteraceae;g__Coprobacter;s__Coprobacter	91.32	0.57	0.00
+bwa_merged_metaspades_hybrid.metabat.45	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Butyricimonas;s__Butyricimonas	97.49	1.41	40.00
+maxbin_output.038_sub	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Odoribacter;s__Odoribacter	87.97	3.49	0.00
+bwa_merged_metaspades_hybrid.metabat.42	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	83.17	0.00	0.00
+maxbin_output.029	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	87.67	5.50	27.27
+maxbin_output.023	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	98.53	5.53	66.67
+maxbin_output.014	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Tidjanibacter;s__Tidjanibacter	99.52	1.44	0.00
+bwa_merged_metaspades_hybrid.metabat.73	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes_A;s__Alistipes_A	80.91	1.12	0.00
+bwa_merged_metaspades_hybrid.metabat.57	d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Parasutterella;s__Parasutterella	81.43	1.25	40.00
+maxbin_output.026	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__;s__	93.96	0.67	0.00
+bwa_merged_metaspades_hybrid.metabat.63	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-103;s__	79.47	0.75	66.67
diff --git a/figures/data/bwa_sr_metaspades_hybrid_HQ_bins.txt b/figures/data/bwa_sr_metaspades_hybrid_HQ_bins.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f66892dce99d58b96dc77da9b53e9fbf72ee30af
--- /dev/null
+++ b/figures/data/bwa_sr_metaspades_hybrid_HQ_bins.txt
@@ -0,0 +1,26 @@
+bwa_sr_metaspades_hybrid.metabat.37	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R	97.32	0.00	0.00
+maxbin_output.014	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D	98.63	0.00	0.00
+bwa_sr_metaspades_hybrid.metabat.14	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium	94.29	0.00	0.00
+bwa_sr_metaspades_hybrid.metabat.7	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Oscillibacter;s__Oscillibacter	87.58	1.85	75.00
+bwa_sr_metaspades_hybrid.metabat.27	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-492;s__CAG-492	71.52	6.06	6.25
+bwa_sr_metaspades_hybrid.metabat.24	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor	98.31	1.93	16.67
+bwa_sr_metaspades_hybrid.metabat.6	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Faecalicatena;s__Faecalicatena	94.39	1.29	75.00
+bwa_sr_metaspades_hybrid.metabat.32_sub	d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Succiniclasticum;s__Succiniclasticum	76.94	0.00	0.00
+maxbin_output.020	d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium;s__Phascolarctobacterium	99.98	2.89	0.00
+bwa_sr_metaspades_hybrid.metabat.33	d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__CAG-196;s__CAG-196	77.34	0.38	0.00
+bwa_sr_metaspades_hybrid.metabat.42	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA6398;s__UBA6398	88.41	1.28	33.33
+maxbin_output.001	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides	99.23	0.50	0.00
+maxbin_output.004	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides_B;s__Bacteroides_B	91.91	4.67	21.05
+maxbin_output.016	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides	94.94	1.63	0.00
+bwa_sr_metaspades_hybrid.metabat.28	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella;s__Barnesiella	95.85	0.00	0.00
+bwa_sr_metaspades_hybrid.metabat.3	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Coprobacteraceae;g__Coprobacter;s__Coprobacter	91.32	0.38	0.00
+bwa_sr_metaspades_hybrid.metabat.46	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Butyricimonas;s__Butyricimonas	97.49	1.41	40.00
+maxbin_output.034	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Odoribacter;s__Odoribacter	91.92	7.36	0.00
+maxbin_output.005	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	99.52	0.48	0.00
+bwa_sr_metaspades_hybrid.metabat.50_sub	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	98.04	1.44	66.67
+maxbin_output.011	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Tidjanibacter;s__Tidjanibacter	99.52	1.92	0.00
+bwa_sr_metaspades_hybrid.metabat.71	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes_A;s__Alistipes_A	80.91	1.12	0.00
+bwa_sr_metaspades_hybrid.metabat.55_sub	d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Parasutterella;s__Parasutterella	81.43	1.25	40.00
+maxbin_output.022	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__;s__	95.97	0.67	0.00
+bwa_sr_metaspades_hybrid.metabat.53	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__	90.71	5.50	25.00
+bwa_sr_metaspades_hybrid.metabat.59	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-103;s__	84.75	0.73	50.00
diff --git a/figures/data/flye_HQ_bins.txt b/figures/data/flye_HQ_bins.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e464429dfa01c75362de93134985b0b486166da1
--- /dev/null
+++ b/figures/data/flye_HQ_bins.txt
@@ -0,0 +1 @@
+maxbin_output.001	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides	67.45	2.58	55.56
diff --git a/figures/data/mappability_over_1000bp.txt b/figures/data/mappability_over_1000bp.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75191afdea81bdada247d0c9616afe39a573f5f0
--- /dev/null
+++ b/figures/data/mappability_over_1000bp.txt
@@ -0,0 +1,11 @@
+sample	mapped_reads	total	percent_mapped
+flye_counts.txt 7144171 8351240 85.5462
+hybrid_bwa_lr_counts.txt 7347760 8299732 88.5301
+hybrid_bwa_merged_counts.txt 57368285 60114498 95.4317
+hybrid_bwa_sr_counts.txt 50020525 51814766 96.5372
+hybrid_metaT_sr_counts.txt 48979035 78868203 62.1024
+hybrid_mmi_lr_counts.txt 7044114 8402725 83.8313
+hybrid_mmi_merged_counts.txt 59404046 63790314 93.1239
+hybrid_mmi_sr_counts.txt 52359932 55387589 94.5337
+megahit_counts.txt 48800047 51826913 94.1597
+megahit_metaT_sr_counts.txt 45584230 78531498 58.0458
diff --git a/figures/data/mappability_over_1500bp.txt b/figures/data/mappability_over_1500bp.txt
new file mode 100644
index 0000000000000000000000000000000000000000..961cdebb9460a460615c1757a1b2899cadf5da11
--- /dev/null
+++ b/figures/data/mappability_over_1500bp.txt
@@ -0,0 +1,11 @@
+sample	mapped_reads	total	percent_mapped
+flye_counts.txt 7130256 8351240 85.3796
+hybrid_bwa_lr_counts.txt 7265218 8299732 87.5356
+hybrid_bwa_merged_counts.txt 56903252 60114498 94.6581
+hybrid_bwa_sr_counts.txt 49638034 51814766 95.799
+hybrid_metaT_sr_counts.txt 45743715 78868203 58.0002
+hybrid_mmi_lr_counts.txt 6953663 8402725 82.7549
+hybrid_mmi_merged_counts.txt 58859670 63790314 92.2705
+hybrid_mmi_sr_counts.txt 51906007 55387589 93.7141
+megahit_counts.txt 47762664 51826913 92.158
+megahit_metaT_sr_counts.txt 36898046 78531498 46.985
diff --git a/figures/data/mappability_over_2000bp.txt b/figures/data/mappability_over_2000bp.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a10b62f93f99823023718287629d5393e3a4a3ff
--- /dev/null
+++ b/figures/data/mappability_over_2000bp.txt
@@ -0,0 +1,11 @@
+sample	mapped_reads	total	percent_mapped
+flye_counts.txt 7121527 8351240 85.2751
+hybrid_bwa_lr_counts.txt 7200635 8299732 86.7574
+hybrid_bwa_merged_counts.txt 56512238 60114498 94.0077
+hybrid_bwa_sr_counts.txt 49311603 51814766 95.169
+hybrid_metaT_sr_counts.txt 42538190 78868203 53.9358
+hybrid_mmi_lr_counts.txt 6887067 8402725 81.9623
+hybrid_mmi_merged_counts.txt 58426554 63790314 91.5916
+hybrid_mmi_sr_counts.txt 51539487 55387589 93.0524
+megahit_counts.txt 46946179 51826913 90.5826
+megahit_metaT_sr_counts.txt 32506911 78531498 41.3935
diff --git a/figures/data/mappability_over_5000bp.txt b/figures/data/mappability_over_5000bp.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa74f416825c4db4c6e4120f36c20b2d1b049bb9
--- /dev/null
+++ b/figures/data/mappability_over_5000bp.txt
@@ -0,0 +1,11 @@
+sample	mapped_reads	total	percent_mapped
+flye_counts.txt 7066935 8351240 84.6214
+hybrid_bwa_lr_counts.txt 6934846 8299732 83.5551
+hybrid_bwa_merged_counts.txt 54644383 60114498 90.9005
+hybrid_bwa_sr_counts.txt 47709537 51814766 92.0771
+hybrid_metaT_sr_counts.txt 29346178 78868203 37.2091
+hybrid_mmi_lr_counts.txt 6611655 8402725 78.6847
+hybrid_mmi_merged_counts.txt 56320978 63790314 88.2908
+hybrid_mmi_sr_counts.txt 49709323 55387589 89.7481
+megahit_counts.txt 43437290 51826913 83.8122
+megahit_metaT_sr_counts.txt 19650549 78531498 25.0225
diff --git a/figures/data/megahit_HQ_bins.txt b/figures/data/megahit_HQ_bins.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f60ed6f510c51ae03f68d2960898b54b75f1c34
--- /dev/null
+++ b/figures/data/megahit_HQ_bins.txt
@@ -0,0 +1,13 @@
+megahit.metabat.12	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R	98.66	1.68	0.00
+megahit.metabat.38	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D	97.26	0.68	100.00
+megahit.metabat.31	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor	98.87	1.81	28.57
+megahit.metabat.18	d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium;s__Phascolarctobacterium	98.18	1.70	0.00
+maxbin_output.002	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides	83.95	2.11	20.00
+megahit.metabat.5	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides	91.15	1.09	75.00
+maxbin_output.027	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella;s__Barnesiella	92.10	1.57	42.86
+megahit.metabat.36	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Coprobacteraceae;g__Coprobacter;s__Coprobacter	98.11	0.77	0.00
+megahit.metabat.13_sub	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Butyricimonas;s__Butyricimonas	80.54	2.96	14.29
+maxbin_output.009	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	96.10	4.90	52.94
+maxbin_output.023	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	92.77	6.96	42.86
+megahit.metabat.43	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Tidjanibacter;s__Tidjanibacter	99.52	0.24	100.00
+megahit.metabat.45	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__;s__	93.51	0.67	0.00
diff --git a/figures/data/mmi_lr_metaspades_hybrid_HQ_bins.txt b/figures/data/mmi_lr_metaspades_hybrid_HQ_bins.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68dd7152f35ea2073ad072197a2c2dcfd9d58f26
--- /dev/null
+++ b/figures/data/mmi_lr_metaspades_hybrid_HQ_bins.txt
@@ -0,0 +1,12 @@
+maxbin_output.025	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R	97.32	1.81	0.00
+maxbin_output.022	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D	98.63	0.00	0.00
+maxbin_output.016	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor	97.58	4.35	18.18
+maxbin_output.021	d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium;s__Phascolarctobacterium	99.98	2.89	0.00
+maxbin_output.001	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides	99.23	0.50	0.00
+maxbin_output.023	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella;s__Barnesiella	95.85	0.38	0.00
+maxbin_output.028	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Butyricimonas;s__Butyricimonas	94.98	2.84	28.57
+maxbin_output.033	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Odoribacter;s__Odoribacter	88.78	2.15	0.00
+maxbin_output.007	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	99.52	0.48	0.00
+maxbin_output.020	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	98.53	2.88	33.33
+maxbin_output.012	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Tidjanibacter;s__Tidjanibacter	99.52	1.44	0.00
+maxbin_output.024	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__;s__	95.97	0.67	0.00
diff --git a/figures/data/mmi_merged_metaspades_hybrid_HQ_bins.txt b/figures/data/mmi_merged_metaspades_hybrid_HQ_bins.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7dff0584af12f8316176438fadb46ef6ccfcf9b
--- /dev/null
+++ b/figures/data/mmi_merged_metaspades_hybrid_HQ_bins.txt
@@ -0,0 +1,26 @@
+mmi_merged_metaspades_hybrid.metabat.59	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R	97.32	0.00	0.00
+maxbin_output.017	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D	98.63	0.00	0.00
+mmi_merged_metaspades_hybrid.metabat.69	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium	94.29	0.00	0.00
+mmi_merged_metaspades_hybrid.metabat.49	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Oscillibacter;s__Oscillibacter	87.58	1.85	75.00
+mmi_merged_metaspades_hybrid.metabat.20	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-492;s__CAG-492	70.94	5.24	8.33
+mmi_merged_metaspades_hybrid.metabat.73	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor	98.31	1.93	16.67
+mmi_merged_metaspades_hybrid.metabat.28	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Faecalicatena;s__Faecalicatena	90.88	2.07	37.50
+mmi_merged_metaspades_hybrid.metabat.47	d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Succiniclasticum;s__Succiniclasticum	78.13	0.00	0.00
+maxbin_output.021	d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium;s__Phascolarctobacterium	100.00	3.19	0.00
+mmi_merged_metaspades_hybrid.metabat.17	d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__CAG-196;s__CAG-196	76.49	0.38	0.00
+mmi_merged_metaspades_hybrid.metabat.65	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA6398;s__UBA6398	87.30	1.28	33.33
+mmi_merged_metaspades_hybrid.metabat.19	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides_A;s__Bacteroides_A	86.53	0.56	50.00
+mmi_merged_metaspades_hybrid.metabat.82	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides_B;s__Bacteroides_B	93.32	8.74	20.00
+mmi_merged_metaspades_hybrid.metabat.46	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella;s__Barnesiella	95.85	0.00	0.00
+mmi_merged_metaspades_hybrid.metabat.5	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Coprobacteraceae;g__Coprobacter;s__Coprobacter	91.32	0.57	0.00
+maxbin_output.029	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Butyricimonas;s__Butyricimonas	92.29	0.69	66.67
+mmi_merged_metaspades_hybrid.metabat.72_sub	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Odoribacter;s__Odoribacter	88.78	3.23	0.00
+mmi_merged_metaspades_hybrid.metabat.40	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	83.17	0.00	0.00
+maxbin_output.025_sub	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	92.51	7.10	26.92
+mmi_merged_metaspades_hybrid.metabat.48_sub	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	98.04	1.44	66.67
+maxbin_output.012	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Tidjanibacter;s__Tidjanibacter	99.52	1.44	0.00
+mmi_merged_metaspades_hybrid.metabat.64	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes_A;s__Alistipes_A	80.91	1.12	0.00
+mmi_merged_metaspades_hybrid.metabat.23	d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Parasutterella;s__Parasutterella	81.43	1.25	40.00
+maxbin_output.023	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__;s__	95.97	0.67	0.00
+maxbin_output.035_sub	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__	85.61	5.61	21.43
+mmi_merged_metaspades_hybrid.metabat.56	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-103;s__	85.42	1.40	33.33
diff --git a/figures/data/mmi_sr_metaspades_hybrid_HQ_bins.txt b/figures/data/mmi_sr_metaspades_hybrid_HQ_bins.txt
new file mode 100644
index 0000000000000000000000000000000000000000..996a53a4b5aa19f9f7639dc6891e62a56e5edcb3
--- /dev/null
+++ b/figures/data/mmi_sr_metaspades_hybrid_HQ_bins.txt
@@ -0,0 +1,27 @@
+mmi_sr_metaspades_hybrid.metabat.61	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R	97.32	0.00	0.00
+maxbin_output.016	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D	98.63	0.00	0.00
+mmi_sr_metaspades_hybrid.metabat.72	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium	94.29	0.00	0.00
+mmi_sr_metaspades_hybrid.metabat.50	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Oscillibacter;s__Oscillibacter	87.58	1.85	75.00
+mmi_sr_metaspades_hybrid.metabat.82	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-492;s__CAG-492	71.52	7.11	6.25
+mmi_sr_metaspades_hybrid.metabat.57	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor	98.31	1.45	0.00
+mmi_sr_metaspades_hybrid.metabat.10	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Faecalicatena;s__Faecalicatena	90.88	1.68	50.00
+mmi_sr_metaspades_hybrid.metabat.48_sub	d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Succiniclasticum;s__Succiniclasticum	77.53	0.00	0.00
+maxbin_output.022	d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium;s__Phascolarctobacterium	99.98	2.89	0.00
+mmi_sr_metaspades_hybrid.metabat.45	d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__CAG-196;s__CAG-196	77.34	0.81	0.00
+mmi_sr_metaspades_hybrid.metabat.67	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA6398;s__UBA6398	87.30	1.28	33.33
+maxbin_output.001	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides	99.23	0.50	0.00
+mmi_sr_metaspades_hybrid.metabat.41	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides_A;s__Bacteroides_A	86.53	0.56	50.00
+mmi_sr_metaspades_hybrid.metabat.80	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides_B;s__Bacteroides_B	93.32	9.11	21.62
+maxbin_output.019_sub	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides	92.63	4.46	22.22
+maxbin_output.025	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella;s__Barnesiella	99.25	0.38	0.00
+mmi_sr_metaspades_hybrid.metabat.5	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Coprobacteraceae;g__Coprobacter;s__Coprobacter	91.32	0.57	0.00
+mmi_sr_metaspades_hybrid.metabat.29	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Butyricimonas;s__Butyricimonas	98.03	1.41	40.00
+mmi_sr_metaspades_hybrid.metabat.71	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Odoribacter;s__Odoribacter	90.39	2.69	0.00
+maxbin_output.006	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	99.52	0.48	0.00
+mmi_sr_metaspades_hybrid.metabat.49_sub	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes	98.04	1.44	66.67
+mmi_sr_metaspades_hybrid.metabat.12	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Tidjanibacter;s__Tidjanibacter	83.65	0.00	0.00
+mmi_sr_metaspades_hybrid.metabat.1	d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes_A;s__Alistipes_A	80.91	1.12	0.00
+mmi_sr_metaspades_hybrid.metabat.26	d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Parasutterella;s__Parasutterella	81.43	1.25	40.00
+mmi_sr_metaspades_hybrid.metabat.9	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__;s__	93.96	0.67	0.00
+mmi_sr_metaspades_hybrid.metabat.47	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__	89.62	4.36	25.00
+mmi_sr_metaspades_hybrid.metabat.56	d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-103;s__	85.51	0.75	66.67
diff --git a/figures/data/overlap_sizes.txt b/figures/data/overlap_sizes.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d86ff8f9080a3c3e3be2de21895a35830f0f585c
--- /dev/null
+++ b/figures/data/overlap_sizes.txt
@@ -0,0 +1,11 @@
+165287 flye
+250742 megahit
+365813 metaspades
+341322 metaspades_hybrid
+109382 flye_megahit
+112740 flye_metaspades_hybrid
+110600 flye_metaspades
+196581 megahit_metaspades_hybrid
+214396 megahit_metaspades
+326458 metaspades_metaspades_hybrid
+101535 flye_megahit_metaspades_hybrid_metaspades
diff --git a/figures/data/total.txt b/figures/data/total.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db35f6a2481179906c3034dff9cbaaae259e0aad
--- /dev/null
+++ b/figures/data/total.txt
@@ -0,0 +1,4 @@
+flye	165287
+megahit	250742
+metaspades	365813
+metaspades_hybrid	341322
diff --git a/figures/envs/figures.yml b/figures/envs/figures.yml
new file mode 100644
index 0000000000000000000000000000000000000000..49f4d878a5b62a68c050f498ee79c2022c67c107
--- /dev/null
+++ b/figures/envs/figures.yml
@@ -0,0 +1,143 @@
+name: ONT_pilot_fig
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=1_llvm
+  - aioeasywebdav=2.4.0=py36_1000
+  - aiohttp=3.6.2=py36h516909a_0
+  - appdirs=1.4.3=py_1
+  - async-timeout=3.0.1=py_1000
+  - attrs=19.3.0=py_0
+  - bcrypt=3.1.7=py36h8c4c3a4_1
+  - boto3=1.13.18=pyh9f0ad1d_0
+  - botocore=1.16.18=pyh9f0ad1d_0
+  - brotlipy=0.7.0=py36h8c4c3a4_1000
+  - ca-certificates=2020.4.5.1=hecc5488_0
+  - cachetools=4.1.0=py_1
+  - cairo=1.16.0=h3fc0475_1004
+  - certifi=2020.4.5.1=py36h9f0ad1d_0
+  - cffi=1.14.0=py36hd463f26_0
+  - chardet=3.0.4=py36h9f0ad1d_1006
+  - configargparse=1.2.3=pyh9f0ad1d_0
+  - cryptography=2.9.2=py36h45558ae_0
+  - datrie=0.8.2=py36h8c4c3a4_0
+  - decorator=4.4.2=py_0
+  - docutils=0.15.2=py36_0
+  - dropbox=9.4.0=py_0
+  - expat=2.2.9=he1b5a44_2
+  - filechunkio=1.8=py_2
+  - fontconfig=2.13.1=h1056068_1002
+  - freetype=2.10.2=he06d7ca_0
+  - ftputil=3.4=py_0
+  - gettext=0.19.8.1=hc5be6a0_1002
+  - gitdb=4.0.5=py_0
+  - gitpython=3.1.2=py_0
+  - glib=2.64.3=h6f030ca_0
+  - google-api-core=1.17.0=py36h9f0ad1d_0
+  - google-auth=1.14.3=pyh9f0ad1d_0
+  - google-cloud-core=1.3.0=py_0
+  - google-cloud-storage=1.28.1=pyh9f0ad1d_0
+  - google-resumable-media=0.5.0=py_1
+  - googleapis-common-protos=1.51.0=py36h9f0ad1d_2
+  - graphite2=1.3.13=he1b5a44_1001
+  - graphviz=2.38.0=hf68f40c_1011
+  - harfbuzz=2.4.0=hee91db6_4
+  - icu=67.1=he1b5a44_0
+  - idna=2.9=py_1
+  - idna_ssl=1.1.0=py36_1000
+  - importlib-metadata=1.6.0=py36h9f0ad1d_0
+  - importlib_metadata=1.6.0=0
+  - jinja2=2.11.2=pyh9f0ad1d_0
+  - jmespath=0.10.0=pyh9f0ad1d_0
+  - jpeg=9c=h14c3975_1001
+  - jsonschema=3.2.0=py36h9f0ad1d_1
+  - ld_impl_linux-64=2.34=h53a641e_4
+  - libblas=3.8.0=16_openblas
+  - libcblas=3.8.0=16_openblas
+  - libffi=3.2.1=he1b5a44_1007
+  - libgcc-ng=9.2.0=h24d8f2e_2
+  - libgfortran-ng=7.5.0=hdf63c60_6
+  - libiconv=1.15=h516909a_1006
+  - liblapack=3.8.0=16_openblas
+  - libopenblas=0.3.9=h5ec1e0e_0
+  - libpng=1.6.37=hed695b0_1
+  - libprotobuf=3.12.1=h8b12597_0
+  - libstdcxx-ng=9.2.0=hdf63c60_2
+  - libtiff=4.1.0=hc7e4089_6
+  - libtool=2.4.6=h14c3975_1002
+  - libuuid=2.32.1=h14c3975_1000
+  - libwebp-base=1.1.0=h516909a_3
+  - libxcb=1.13=h14c3975_1002
+  - libxml2=2.9.10=h72b56ed_1
+  - llvm-openmp=10.0.0=hc9558a2_0
+  - lz4-c=1.9.2=he1b5a44_1
+  - markupsafe=1.1.1=py36h8c4c3a4_1
+  - multidict=4.7.5=py36h8c4c3a4_1
+  - ncurses=6.1=hf484d3e_1002
+  - networkx=2.4=py_1
+  - numpy=1.18.4=py36h7314795_0
+  - openssl=1.1.1g=h516909a_0
+  - pandas=1.0.3=py36h830a2c2_1
+  - pango=1.40.14=he7ab937_1005
+  - paramiko=2.7.1=py36_0
+  - pcre=8.44=he1b5a44_0
+  - pip=20.1.1=py_1
+  - pixman=0.38.0=h516909a_1003
+  - prettytable=0.7.2=py_3
+  - protobuf=3.12.1=py36h831f99a_0
+  - psutil=5.7.0=py36h8c4c3a4_1
+  - pthread-stubs=0.4=h14c3975_1001
+  - pyasn1=0.4.8=py_0
+  - pyasn1-modules=0.2.7=py_0
+  - pycparser=2.20=py_0
+  - pygraphviz=1.5=py36h8c4c3a4_1002
+  - pynacl=1.3.0=py36h516909a_1001
+  - pyopenssl=19.1.0=py_1
+  - pyrsistent=0.16.0=py36h8c4c3a4_0
+  - pysftp=0.2.9=py_1
+  - pysocks=1.7.1=py36h9f0ad1d_1
+  - python=3.6.10=h8356626_1011_cpython
+  - python-dateutil=2.8.1=py_0
+  - python-irodsclient=0.8.2=py_0
+  - python_abi=3.6=1_cp36m
+  - pytz=2020.1=pyh9f0ad1d_0
+  - pyyaml=5.3.1=py36h8c4c3a4_0
+  - ratelimiter=1.2.0=py36_1000
+  - readline=8.0=hf8c457e_0
+  - requests=2.23.0=pyh8c360ce_2
+  - rsa=4.0=py_0
+  - s3transfer=0.3.3=py36h9f0ad1d_1
+  - setuptools=46.4.0=py36h9f0ad1d_0
+  - six=1.15.0=pyh9f0ad1d_0
+  - smmap=3.0.4=pyh9f0ad1d_0
+  - snakemake=5.3.0=py36_1
+  - snakemake-minimal=5.3.0=py36_1
+  - sqlite=3.30.1=hcee41ef_0
+  - tk=8.6.10=hed695b0_0
+  - typing_extensions=3.7.4.2=py_0
+  - urllib3=1.25.9=py_0
+  - wheel=0.34.2=py_1
+  - wrapt=1.12.1=py36h8c4c3a4_1
+  - xmlrunner=1.7.7=py_0
+  - xorg-kbproto=1.0.7=h14c3975_1002
+  - xorg-libice=1.0.10=h516909a_0
+  - xorg-libsm=1.2.3=h84519dc_1000
+  - xorg-libx11=1.6.9=h516909a_0
+  - xorg-libxau=1.0.9=h14c3975_0
+  - xorg-libxdmcp=1.1.3=h516909a_0
+  - xorg-libxext=1.3.4=h516909a_0
+  - xorg-libxpm=3.5.13=h516909a_0
+  - xorg-libxrender=0.9.10=h516909a_1002
+  - xorg-libxt=1.1.5=h516909a_1003
+  - xorg-renderproto=0.11.1=h14c3975_1002
+  - xorg-xextproto=7.3.0=h14c3975_1002
+  - xorg-xproto=7.0.31=h14c3975_1007
+  - xz=5.2.5=h516909a_0
+  - yaml=0.2.4=h516909a_0
+  - yarl=1.3.0=py36h516909a_1000
+  - zipp=3.1.0=py_0
+  - zlib=1.2.11=h516909a_1006
+  - zstd=1.4.4=h6597ccf_3
diff --git a/figures/envs/r.yml b/figures/envs/r.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6c984b52da8329cb129f9a74ba44b815d3456cb9
--- /dev/null
+++ b/figures/envs/r.yml
@@ -0,0 +1,196 @@
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=1_llvm
+  - _r-mutex=1.0.1=anacondar_1
+  - binutils_impl_linux-64=2.34=h53a641e_4
+  - binutils_linux-64=2.34=hc952b39_20
+  - bwidget=1.9.14=0
+  - bzip2=1.0.8=h516909a_2
+  - ca-certificates=2020.4.5.1=hecc5488_0
+  - cairo=1.16.0=hcf35c78_1003
+  - certifi=2020.4.5.1=py38h32f6830_0
+  - curl=7.69.1=h33f0ec9_0
+  - fontconfig=2.13.1=h86ecdb6_1001
+  - freetype=2.10.2=he06d7ca_0
+  - fribidi=1.0.9=h516909a_0
+  - gcc_impl_linux-64=7.5.0=hd420e75_6
+  - gcc_linux-64=7.5.0=h09487f9_20
+  - gettext=0.19.8.1=hc5be6a0_1002
+  - gfortran_impl_linux-64=7.5.0=hdf63c60_6
+  - gfortran_linux-64=7.5.0=h09487f9_20
+  - glib=2.64.3=h6f030ca_0
+  - graphite2=1.3.13=he1b5a44_1001
+  - gsl=2.6=h294904e_0
+  - gxx_impl_linux-64=7.5.0=hdf63c60_6
+  - gxx_linux-64=7.5.0=h09487f9_20
+  - harfbuzz=2.4.0=h9f30f68_3
+  - icu=64.2=he1b5a44_1
+  - jpeg=9c=h14c3975_1001
+  - krb5=1.17.1=h2fd8d38_0
+  - ld_impl_linux-64=2.34=h53a641e_4
+  - libblas=3.8.0=16_openblas
+  - libcblas=3.8.0=16_openblas
+  - libcurl=7.69.1=hf7181ac_0
+  - libedit=3.1.20170329=hf8c457e_1001
+  - libffi=3.2.1=he1b5a44_1007
+  - libgcc-ng=9.2.0=h24d8f2e_2
+  - libgfortran-ng=7.5.0=hdf63c60_6
+  - libgomp=9.2.0=h24d8f2e_2
+  - libiconv=1.15=h516909a_1006
+  - liblapack=3.8.0=16_openblas
+  - libopenblas=0.3.9=h5ec1e0e_0
+  - libpng=1.6.37=hed695b0_1
+  - libssh2=1.9.0=hab1572f_2
+  - libstdcxx-ng=9.2.0=hdf63c60_2
+  - libtiff=4.1.0=hc7e4089_6
+  - libuuid=2.32.1=h14c3975_1000
+  - libwebp-base=1.1.0=h516909a_3
+  - libxcb=1.13=h14c3975_1002
+  - libxml2=2.9.10=hee79883_0
+  - llvm-openmp=10.0.0=hc9558a2_0
+  - lz4-c=1.9.2=he1b5a44_1
+  - make=4.3=h516909a_0
+  - ncurses=6.1=hf484d3e_1002
+  - openssl=1.1.1g=h516909a_0
+  - pango=1.42.4=h7062337_4
+  - pcre=8.44=he1b5a44_0
+  - pcre2=10.34=h2f06484_0
+  - pip=20.1.1=py_1
+  - pixman=0.38.0=h516909a_1003
+  - pthread-stubs=0.4=h14c3975_1001
+  - python=3.8.2=he5300dc_7_cpython
+  - python_abi=3.8=1_cp38
+  - r-argparse=2.0.1=r40h6115d3f_2
+  - r-askpass=1.1=r40hcdcec82_2
+  - r-assertthat=0.2.1=r40h6115d3f_2
+  - r-backports=1.1.7=r40hcdcec82_0
+  - r-base=4.0.0=hdca8982_3
+  - r-brew=1.0_6=r40h6115d3f_1003
+  - r-callr=3.4.3=r40h6115d3f_1
+  - r-cli=2.0.2=r40h6115d3f_1
+  - r-clipr=0.7.0=r40h6115d3f_1
+  - r-colorspace=1.4_1=r40hcdcec82_2
+  - r-commonmark=1.7=r40hcdcec82_1002
+  - r-covr=3.5.0=r40h0357c0b_1
+  - r-crayon=1.3.4=r40h6115d3f_1003
+  - r-crosstalk=1.1.0.1=r40h6115d3f_1
+  - r-curl=4.3=r40hcdcec82_1
+  - r-desc=1.2.0=r40h6115d3f_1003
+  - r-devtools=2.3.0=r40h6115d3f_1
+  - r-digest=0.6.25=r40h0357c0b_2
+  - r-dplyr=0.8.5=r40h0357c0b_1
+  - r-dt=0.13=r40h6115d3f_1
+  - r-ellipsis=0.3.1=r40hcdcec82_0
+  - r-evaluate=0.14=r40h6115d3f_2
+  - r-fansi=0.4.1=r40hcdcec82_1
+  - r-farver=2.0.3=r40h0357c0b_1
+  - r-findpython=1.0.5=r40h6115d3f_2
+  - r-fs=1.4.1=r40h0357c0b_1
+  - r-getopt=1.20.3=r40_2
+  - r-ggfortify=0.4.10=r40h6115d3f_1
+  - r-ggplot2=3.3.0=r40h6115d3f_1
+  - r-ggrepel=0.8.2=r40h0357c0b_1
+  - r-ggsci=2.9=r40h6115d3f_1003
+  - r-ggsignif=0.6.0=r40h6115d3f_1
+  - r-gh=1.1.0=r40h6115d3f_1
+  - r-git2r=0.27.1=r40h7253d3a_0
+  - r-glue=1.4.1=r40hcdcec82_0
+  - r-gridextra=2.3=r40h6115d3f_1003
+  - r-gtable=0.3.0=r40h6115d3f_3
+  - r-highr=0.8=r40h6115d3f_2
+  - r-htmltools=0.4.0=r40h0357c0b_1
+  - r-htmlwidgets=1.5.1=r40h6115d3f_1
+  - r-httr=1.4.1=r40h6115d3f_2
+  - r-ini=0.3.1=r40h6115d3f_1003
+  - r-isoband=0.2.1=r40h0357c0b_1
+  - r-jsonlite=1.6.1=r40hcdcec82_1
+  - r-knitr=1.28=r40h6115d3f_1
+  - r-labeling=0.3=r40h6115d3f_1003
+  - r-later=1.0.0=r40h0357c0b_1
+  - r-lattice=0.20_41=r40hcdcec82_2
+  - r-lazyeval=0.2.2=r40hcdcec82_2
+  - r-lifecycle=0.2.0=r40h6115d3f_1
+  - r-magrittr=1.5=r40h6115d3f_1003
+  - r-markdown=1.1=r40hcdcec82_1
+  - r-mass=7.3_51.6=r40hcdcec82_2
+  - r-matrix=1.2_18=r40h7fa42b6_3
+  - r-memoise=1.1.0=r40h6115d3f_1004
+  - r-mgcv=1.8_31=r40h7fa42b6_1
+  - r-mime=0.9=r40hcdcec82_1
+  - r-munsell=0.5.0=r40h6115d3f_1003
+  - r-nlme=3.1_148=r40h9bbef5b_0
+  - r-openssl=1.4.1=r40he5c4762_1
+  - r-pheatmap=1.0.12=r40h6115d3f_2
+  - r-pillar=1.4.4=r40h6115d3f_0
+  - r-pkgbuild=1.0.8=r40h6115d3f_0
+  - r-pkgconfig=2.0.3=r40h6115d3f_1
+  - r-pkgload=1.0.2=r40h0357c0b_1002
+  - r-plogr=0.2.0=r40h6115d3f_1003
+  - r-plyr=1.8.6=r40h0357c0b_1
+  - r-praise=1.0.0=r40h6115d3f_1004
+  - r-prettyunits=1.1.1=r40h6115d3f_1
+  - r-processx=3.4.2=r40hcdcec82_1
+  - r-promises=1.1.0=r40h0357c0b_1
+  - r-ps=1.3.3=r40hcdcec82_0
+  - r-purrr=0.3.4=r40hcdcec82_1
+  - r-r6=2.4.1=r40h6115d3f_1
+  - r-rcmdcheck=1.3.3=r40h6115d3f_3
+  - r-rcolorbrewer=1.1_2=r40h6115d3f_1003
+  - r-rcpp=1.0.4.6=r40h0357c0b_1
+  - r-rematch2=2.1.2=r40h6115d3f_1
+  - r-remotes=2.1.1=r40h6115d3f_1
+  - r-reshape2=1.4.4=r40h0357c0b_1
+  - r-rex=1.2.0=r40h6115d3f_1
+  - r-rlang=0.4.6=r40hcdcec82_0
+  - r-roxygen2=7.1.0=r40h0357c0b_1
+  - r-rprojroot=1.3_2=r40h6115d3f_1003
+  - r-rstudioapi=0.11=r40h6115d3f_1
+  - r-rversions=2.0.2=r40h6115d3f_0
+  - r-scales=1.1.1=r40h6115d3f_0
+  - r-sessioninfo=1.1.1=r40h6115d3f_1002
+  - r-stringi=1.4.6=r40h0e574ca_2
+  - r-stringr=1.4.0=r40h6115d3f_2
+  - r-sys=3.3=r40hcdcec82_1
+  - r-testit=0.11=r40h6115d3f_1
+  - r-testthat=2.3.2=r40h0357c0b_1
+  - r-tibble=3.0.1=r40hcdcec82_1
+  - r-tidyr=1.1.0=r40h0357c0b_0
+  - r-tidyselect=1.1.0=r40h6115d3f_0
+  - r-upsetr=1.4.0=r40h6115d3f_2
+  - r-usethis=1.6.1=r40h6115d3f_1
+  - r-utf8=1.1.4=r40hcdcec82_1003
+  - r-vctrs=0.3.0=r40hcdcec82_0
+  - r-viridis=0.5.1=r40h6115d3f_1004
+  - r-viridislite=0.3.0=r40h6115d3f_1003
+  - r-whisker=0.4=r40h6115d3f_1
+  - r-withr=2.2.0=r40h6115d3f_1
+  - r-xfun=0.14=r40h6115d3f_0
+  - r-xml2=1.3.2=r40h0357c0b_1
+  - r-xopen=1.0.0=r40h6115d3f_1003
+  - r-yaml=2.2.1=r40hcdcec82_1
+  - r-zeallot=0.1.0=r40h6115d3f_1002
+  - readline=8.0=hf8c457e_0
+  - sed=4.7=h1bed415_1000
+  - setuptools=46.4.0=py38h32f6830_0
+  - sqlite=3.30.1=hcee41ef_0
+  - sysroot_linux-64=2.12=h3a0023d_3
+  - tk=8.6.10=hed695b0_0
+  - tktable=2.10=h555a92e_3
+  - wheel=0.34.2=py_1
+  - xorg-kbproto=1.0.7=h14c3975_1002
+  - xorg-libice=1.0.10=h516909a_0
+  - xorg-libsm=1.2.3=h84519dc_1000
+  - xorg-libx11=1.6.9=h516909a_0
+  - xorg-libxau=1.0.9=h14c3975_0
+  - xorg-libxdmcp=1.1.3=h516909a_0
+  - xorg-libxext=1.3.4=h516909a_0
+  - xorg-libxrender=0.9.10=h516909a_1002
+  - xorg-renderproto=0.11.1=h14c3975_1002
+  - xorg-xextproto=7.3.0=h14c3975_1002
+  - xorg-xproto=7.0.31=h14c3975_1007
+  - xz=5.2.5=h516909a_0
+  - zlib=1.2.11=h516909a_1006
+  - zstd=1.4.4=h6597ccf_3
diff --git a/figures/figures.smk b/figures/figures.smk
new file mode 100644
index 0000000000000000000000000000000000000000..9440dcd3839ab996308a56d793d582277ee60d62
--- /dev/null
+++ b/figures/figures.smk
@@ -0,0 +1,354 @@
+##################################################
+# Config
+##################################################
+configfile: "figures.yml"
+
+##################################################
+# Python modules
+##################################################
+import os
+
+##################################################
+# TARGETS
+##################################################
+FIG_MMSEQ_UPSETR    = os.path.join(config["output"], config["fig_mmseq_upsetr"]["output"])
+FIG_QUAST           = os.path.join(config["output"], config["fig_quast"]["output"])
+FIG_PARTIAL_GENES   = os.path.join(config["output"], config["fig_partial_genes"]["output"])
+FIG_NANOSTATS       = os.path.join(config["output"], config["fig_nanostats"]["output"])
+FIG_CRISPR          = os.path.join(config["output"], config["fig_crispr"]["output"])
+FIG_PLASFLOW        = os.path.join(config["output"], config["fig_plasflow"]["output"])
+FIG_RGI             = os.path.join(config["output"], config["fig_rgi"]["output"])
+
+##################################################
+# RULES
+##################################################
+rule all:
+    input:
+        FIG_MMSEQ_UPSETR,
+        FIG_QUAST,
+        FIG_PARTIAL_GENES,
+        FIG_NANOSTATS,
+        FIG_CRISPR,
+        FIG_PLASFLOW,
+        FIG_RGI,
+
+rule fig_mmseq_upsetr:
+    input:
+        overlap_sizes=config["fig_mmseq_upsetr"]["input"]["overlap_sizes"]
+    output:
+        pdf=FIG_MMSEQ_UPSETR
+    log:
+        FIG_MMSEQ_UPSETR + ".log"
+    params:
+        utils=config["utils"],
+        width=config["fig_mmseq_upsetr"]["width"],
+        height=config["fig_mmseq_upsetr"]["height"]
+    conda:
+        "envs/r.yml"
+    script:
+        config["fig_mmseq_upsetr"]["script"]
+
+rule fig_quast:
+    input:
+        stats=config["fig_quast"]["input"]["stats"]
+    output:
+        pdf=FIG_QUAST
+    log:
+        FIG_QUAST + ".log"
+    params:
+        utils=config["utils"],
+        width=config["fig_quast"]["width"],
+        height=config["fig_quast"]["height"]
+    conda:
+        "envs/r.yml"
+    script:
+        config["fig_quast"]["script"]
+
+rule fig_partial_genes:
+    input:
+        counts=config["fig_partial_genes"]["input"]["counts"]
+    output:
+        pdf=FIG_PARTIAL_GENES
+    log:
+        FIG_PARTIAL_GENES + ".log"
+    params:
+        utils=config["utils"],
+        width=config["fig_partial_genes"]["width"],
+        height=config["fig_partial_genes"]["height"]
+    conda:
+        "envs/r.yml"
+    script:
+        config["fig_partial_genes"]["script"]
+
+rule fig_nanostats_data:
+    input:
+        expand(
+            os.path.join(config["data_path"], "{meth_flag}/qc/lr/{sel_flag}/no_barcode/no_barcodeNanoStats.txt"),
+            meth_flag=["results", "non_methylation_aware_results"],
+            sel_flag=["S1_SizeSelected", "S3_Gtube", "merged"]
+        )
+    output:
+        config["fig_nanostats"]["input"]["stats"]
+    run:
+        import re
+        import pandas
+        summary = dict()
+        for ifile_path in input:
+            summary[ifile_path] = dict()
+            # info from file path
+            summary[ifile_path]["meth_flag"] = "M" if not re.search("non_methylation_aware", ifile_path) else "N"
+            sel_flag  = None
+            if re.search("S1_SizeSelected", ifile_path):
+                sel_flag = "size_selected"
+            elif re.search("S3_Gtube", ifile_path):
+                sel_flag = "gtube"
+            elif re.search("merged", ifile_path):
+                sel_flag = "merged"
+            assert sel_flag is not None
+            summary[ifile_path]["sel_flag"] = sel_flag
+            # file content
+            flag = None
+            with open(ifile_path, "r") as ifile:
+                for line in ifile:
+                    if re.match("General summary", line):
+                        flag = "general"
+                    elif re.match("Number, percentage and megabases of reads above quality cutoffs", line):
+                        flag = "number"
+                    elif re.match("Top 5 highest mean basecall quality scores and their read lengths", line):
+                        flag = "top5QS"
+                    elif re.match("Top 5 longest reads and their mean basecall quality score", line):
+                        flag = "top5LR"
+                    else:
+                        # general summary
+                        if flag == "general":
+                            line = re.sub(":\s+", "\t", line)
+                            k, v = line.split("\t")
+                            summary[ifile_path][k] = float(re.sub(",", "", v))
+                        # rest: skip
+                        else:
+                            continue
+        # dict to DataFrame
+        summary = pandas.DataFrame.from_dict(summary, orient="index")
+        # wrote to file
+        summary.to_csv(output[0], sep="\t", header=True, index=False, index_label=False)
+
+rule fig_nanostats:
+    input:
+        stats=config["fig_nanostats"]["input"]["stats"]
+    output:
+        pdf=FIG_NANOSTATS
+    log:
+        FIG_NANOSTATS + ".log"
+    params:
+        utils=config["utils"],
+        width=config["fig_nanostats"]["width"],
+        height=config["fig_nanostats"]["height"]
+    conda:
+        "envs/r.yml"
+    script:
+        config["fig_nanostats"]["script"]
+
+rule fig_crispr_data:
+    input:
+        expand(
+            os.path.join(config["data_path"], "results/analysis/crispr/minced/{asm_tool}.txt"),
+            asm_tool=config["assemblers"]
+        ),
+        expand(
+            os.path.join(config["data_path"], "results/analysis/crispr/casc/{asm_tool}_casc_output/{asm_tool}.results.txt"),
+            asm_tool=config["assemblers"]
+        )
+    output:
+        config["fig_crispr"]["input"]["stats"]
+    run:
+        import os
+        import re
+        import pandas
+        from src.utils import parse_minced_report, parse_casc_report
+        summary = []
+        for ifile_path in input:
+            ifile_df = None
+            # info from file path
+            crispr_tool = None
+            if re.search("minced", ifile_path):
+                crispr_tool = "minced"
+            elif re.search("casc", ifile_path):
+                crispr_tool = "casc"
+            asm_tool = os.path.basename(ifile_path).split(".")[0]
+            # file content
+            if crispr_tool == "minced":
+                ifile_df = pandas.DataFrame(parse_minced_report(ifile_path))
+            elif crispr_tool == "casc":
+                ifile_df = parse_casc_report(ifile_path)
+            ifile_df = ifile_df.assign(crispr_tool=crispr_tool)
+            ifile_df = ifile_df.assign(asm_tool=asm_tool)
+            summary.append(ifile_df)
+        # concat
+        summary = pandas.concat(objs=summary, axis="index")
+        # wrote to file
+        summary.to_csv(output[0], sep="\t", header=True, index=False, index_label=False)
+
+rule fig_crispr:
+    input:
+        stats=config["fig_crispr"]["input"]["stats"]
+    output:
+        pdf=FIG_CRISPR
+    log:
+        FIG_CRISPR + ".log"
+    params:
+        utils=config["utils"],
+        width=config["fig_crispr"]["width"],
+        height=config["fig_crispr"]["height"]
+    conda:
+        "envs/r.yml"
+    script:
+        config["fig_crispr"]["script"]
+
+rule fig_plasflow_data:
+    input:
+        expand(
+            os.path.join(config["data_path"], "results/analysis/plasflow/{asm_tool}.tsv"),
+            asm_tool=config["assemblers"]
+        )
+    output:
+        config["fig_plasflow"]["input"]["stats"]
+    run:
+        import pandas
+
+        def sum_up(df, colname, label):
+            df_sum = df.groupby(colname)['contig_length'].agg(['sum','count'])
+            df_sum["count_pct"] = 100 * df_sum["count"] / sum(df_sum["count"])
+            df_sum["sum_pct"]   = 100 * df_sum["sum"]   / sum(df_sum["sum"])
+            df_sum["type"]      = label
+            return df_sum
+
+        # read in and summarize
+        summary = []
+        for ifile in input:
+            idf = pandas.read_csv(ifile, sep="\t", header=0, usecols=["contig_length", "label"])
+            # extract class label, i.e. w/o taxon
+            idf = idf.assign(classlabel=idf.label.apply(lambda x: x.split(".")[0]))
+            # sum up
+            idf_labelsum = sum_up(idf, "label", "label")
+            idf_classsum = sum_up(idf, "classlabel", "class")
+            idf_sum = pandas.concat(objs=[idf_labelsum, idf_classsum], axis="index")
+            idf_sum["tool"] = os.path.splitext(os.path.basename(ifile))[0]
+            summary.append(idf_sum)
+        # concat and save
+        summary = pandas.concat(objs=summary, axis="index")
+        summary.to_csv(output[0], sep="\t", header=True, index=True, index_label="label")
+
+rule fig_plasflow:
+    input:
+        stats=config["fig_plasflow"]["input"]["stats"]
+    output:
+        pdf=FIG_PLASFLOW
+    log:
+        FIG_PLASFLOW + ".log"
+    params:
+        utils=config["utils"],
+        width=config["fig_plasflow"]["width"],
+        height=config["fig_plasflow"]["height"]
+    conda:
+        "envs/r.yml"
+    script:
+        config["fig_plasflow"]["script"]
+
+rule fig_rgi_data:
+    input:
+        expand(
+            os.path.join(config["data_path"], "results/analysis/rgi/{asm_tool}.txt"),
+            asm_tool=config["assemblers"]
+        )
+    output:
+        config["fig_rgi"]["input"]["stats"]
+    run:
+        import pandas
+        import numpy
+
+        summary_cols  = ["Best_Hit_ARO", "ARO", "Drug Class", "Resistance Mechanism", "AMR Gene Family"]
+        summary_types = ["all", "strict", "nudged"]
+
+        def count_hits(df, colname, tool_label=None):
+            """
+            Sum up RGI results: count hits by grouping them
+            """
+            df_sum = df.groupby(colname)[colname].agg(["count"])
+            df_sum["label"] = df_sum.index
+            if tool_label is not None:
+                df_sum.rename(columns={"count": tool_label}, inplace=True)
+            return df_sum
+        
+        def sumup_hits(df, summary_cols, tool_label):
+            """
+            Count hits grouping them by each given column: all/strict/nudged
+            """
+            assert set(df.Nudged) == {numpy.nan, True}, "Unexpected \"Nudged\" value in RGI report: {}".format(set(df.Nudged))
+            summary = dict.fromkeys(summary_cols)
+            for summary_col in summary_cols:
+                summary[summary_col] = {
+                    "all": count_hits(df, summary_col, tool_label),
+                    "strict": count_hits(df[pandas.isnull(df.Nudged)], summary_col, tool_label),
+                    "nudged": count_hits(df[pandas.notnull(df.Nudged)], summary_col, tool_label)
+                }
+            return summary
+        
+        def merge_summaries(dfs, summary_col, summary_type):
+            """
+            Merge summaries (list of DataFrame obj.s) for given column and type
+            """
+            summary = None
+            for df in dfs:
+                if summary is None:
+                        summary =  df[summary_col][summary_type].copy()
+                else:
+                    summary = summary.merge(
+                        right=df[summary_col][summary_type],
+                        on="label",
+                        how="outer",
+                        sort=False
+                    )
+            summary["col"]  = summary_col
+            summary["type"] = summary_type
+            return summary
+
+        # read in and summarize
+        summaries = []
+        for ifile in input:
+            # tool name from file name
+            ifile_tool = os.path.splitext(os.path.basename(ifile))[0]
+            # read in the report
+            summaries.append(
+                sumup_hits(
+                    pandas.read_csv(ifile, sep="\t", header=0, usecols=["ORF_ID"] +  summary_cols + ["Nudged"]),
+                    summary_cols,
+                    ifile_tool
+                )
+            )
+        # merge
+        merged_summaries = []
+        for summary_col in summary_cols:
+            for summary_type in summary_types:
+                merged_summaries.append(merge_summaries(summaries, summary_col, summary_type))
+        # concat
+        summary = pandas.concat(objs=merged_summaries, axis="index")
+        # reorder columns
+        summary_cols_ordered = ["label", "col", "type"] + [c for c in summary.columns if c not in ["label", "col", "type"]]
+        # save
+        summary[summary_cols_ordered].to_csv(output[0], sep="\t", header=True, index=False, index_label=False, na_rep=0)
+
+rule fig_rgi:
+    input:
+        stats=config["fig_rgi"]["input"]["stats"]
+    output:
+        pdf=FIG_RGI
+    log:
+        FIG_RGI + ".log"
+    params:
+        utils=config["utils"],
+        width=config["fig_rgi"]["width"],
+        height=config["fig_rgi"]["height"]
+    conda:
+        "envs/r.yml"
+    script:
+        config["fig_rgi"]["script"]
diff --git a/figures/figures.yml b/figures/figures.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ee79a0879067981e8e1859c4c33919ae6be607f3
--- /dev/null
+++ b/figures/figures.yml
@@ -0,0 +1,62 @@
+# general
+data_path: "/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/"
+output: "output" # output directory
+utils: "src/utils.R" # path to utils.R
+assemblers: ["flye", "megahit", "metaspades", "metaspades_hybrid"]
+
+# figures
+fig_mmseq_upsetr:
+    script: "src/fig_mmseq_upsetr.R"
+    input:
+        overlap_sizes: "data/overlap_sizes.txt"
+    output: "fig_mmseq_upsetr.pdf"
+    width: 7
+    height: 5
+
+fig_quast:
+    script: "src/fig_quast.R"
+    input:
+        stats: "/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/quast_methylation_v_non_comparison/quast_comparision_meth_v_non_results/report.tsv"
+    output: "fig_quast.pdf"
+    width: 7
+    height: 10
+
+fig_partial_genes:
+    script: "src/fig_partial_genes.R"
+    input:
+        counts: "data/2019_GDB_protein_partial_genes.txt"
+    output: "fig_partial_genes.pdf"
+    width: 7
+    height: 5
+
+fig_nanostats:
+    script: "src/fig_nanostats.R"
+    input:
+        stats: "data/nanostats_summary.tsv"
+    output: "fig_nanostats.pdf"
+    width: 7
+    height: 9
+
+fig_crispr:
+    script: "src/fig_crispr.R"
+    input:
+        stats: "data/crispr_summary.tsv"
+    output: "fig_crispr.pdf"
+    width: 7
+    height: 5
+
+fig_plasflow:
+    script: "src/fig_plasflow.R"
+    input:
+        stats: "data/plasflow_summary.tsv"
+    output: "fig_plasflow.pdf"
+    width: 7
+    height: 5
+
+fig_rgi:
+    script: "src/fig_rgi.R"
+    input:
+        stats: "data/rgi_summary.tsv"
+    output: "fig_rgi.pdf"
+    width: 10
+    height: 7
\ No newline at end of file
diff --git a/figures/src/PathoFact_AMR_Figures.R b/figures/src/PathoFact_AMR_Figures.R
new file mode 100644
index 0000000000000000000000000000000000000000..56df94a5a0eacc69ca952bf27956714276e061fa
--- /dev/null
+++ b/figures/src/PathoFact_AMR_Figures.R
@@ -0,0 +1,96 @@
+# To generate figures for PathoFact output
+library(tidyverse)
+library(ggplot2)
+#################
+# Preprocessing #
+#################
+ONT <- read.csv("~/Documents/R_data/nanopore_ONT/pathofact_AMR/PathoFact_ONT.csv", row.names=1)
+# Toxin prediction
+Toxin_confidence_level <- c("-","1","2")
+Toxin_prediction <- c("non-pathogenic","secreted","non-secreted")
+Toxin <- data.frame(Toxin_confidence_level,Toxin_prediction)
+Toxin_ONT <- ONT %>% group_by(Toxin_confidence_level, Sample) %>% summarise(RNum_Gi=sum(RNum_Gi))
+Toxin_ONT <- left_join(Toxin_ONT, Toxin, by="Toxin_confidence_level")
+# Virulence prediction
+Virulence_confidence_level <- c("-","1","2","3","4")
+virulence_prediction <- c("non-pathogenic","secreted","non-secreted","non-pathogenic","non-pathogenic")
+Virulence <- data.frame(Virulence_confidence_level,virulence_prediction)
+Virulence_ONT <- ONT %>% group_by(Virulence_confidence_level, Sample) %>% summarise(RNum_Gi=sum(RNum_Gi))
+Virulence_ONT <- left_join(Virulence_ONT, Virulence)
+Virulence_ONT <- Virulence_ONT %>% group_by(virulence_prediction, Sample) %>% summarise(RNum_Gi=sum(RNum_Gi))
+# AMR prediction
+AMR_ONT <- ONT %>% select(10,15,16)
+AMR_ONT$AMR <- ifelse(AMR_ONT$AMR_Category == "-", "-", "AMR")
+## Overall AMR
+AMR_ONT_overall <- AMR_ONT %>% group_by(AMR, Sample) %>% summarise(RNum_Gi=sum(RNum_Gi))
+## AMR category
+AMR_ONT_category <- AMR_ONT %>% group_by(AMR_Category, Sample) %>% summarise(RNum_Gi=sum(RNum_Gi))
+
+ONT <- read.csv("~/Documents/R_data/nanopore_ONT/pathofact_AMR/PathoFact_ONT.csv", row.names=1)
+# Toxin prediction
+Toxin_confidence_level <- c("-","1","2")
+Toxin_prediction <- c("non-pathogenic","secreted","non-secreted")
+Toxin <- data.frame(Toxin_confidence_level,Toxin_prediction)
+Toxin_ONT <- ONT %>% group_by(Toxin_confidence_level, Sample) %>% summarise(RNum_Gi=sum(RNum_Gi))
+Toxin_ONT <- left_join(Toxin_ONT, Toxin, by="Toxin_confidence_level")
+# Virulence prediction
+Virulence_confidence_level <- c("-","1","2","3","4")
+virulence_prediction <- c("non-pathogenic","secreted","non-secreted","non-pathogenic","non-pathogenic")
+Virulence <- data.frame(Virulence_confidence_level,virulence_prediction)
+Virulence_ONT <- ONT %>% group_by(Virulence_confidence_level, Sample) %>% summarise(RNum_Gi=sum(RNum_Gi))
+Virulence_ONT <- left_join(Virulence_ONT, Virulence)
+Virulence_ONT <- Virulence_ONT %>% group_by(virulence_prediction, Sample) %>% summarise(RNum_Gi=sum(RNum_Gi))
+# AMR prediction
+AMR_ONT <- ONT %>% select(10,15,16)
+AMR_ONT$AMR <- ifelse(AMR_ONT$AMR_Category == "-", "-", "AMR")
+## Overall AMR
+AMR_ONT_overall <- AMR_ONT %>% group_by(AMR, Sample) %>% summarise(RNum_Gi=sum(RNum_Gi))
+## AMR category
+AMR_ONT_category <- AMR_ONT %>% group_by(AMR_Category, Sample) %>% summarise(RNum_Gi=sum(RNum_Gi))
+
+###########
+# Figures #
+###########
+#FIGURES
+#set theme
+theme_set(theme_bw() +
+            theme(strip.background = element_rect(colour = "black", fill = "white"), strip.text = element_text(size=14, family="Arial")) +
+            theme(axis.text.x = element_text(angle = 90, size = 12, family = "Arial", hjust = 0.95),
+                  axis.text.y = element_text(size = 14, family = "Arial"),
+                  axis.title.y = element_text(size = 12, family = "Arial"),
+                  axis.title.x=element_text(size = 14, face = "bold", family = "Arial"),
+                  axis.ticks.x=element_blank()) +
+            theme(plot.title = element_text(hjust = 0.5)) +
+            theme(legend.text = element_text(size = 9, family = "Arial"),
+                  legend.title = element_text(size = 9, family = "Arial", face = "bold"), legend.direction = "vertical", legend.box = "vertical")) 
+# Toxin Figure
+Toxin_ONT_figure <- Toxin_ONT %>% filter(Toxin_prediction != "non-pathogenic")
+Toxin_plot <- Toxin_ONT_figure %>% ggplot(aes(x=Sample, y=RNum_Gi, fill = Sample)) +
+  geom_bar(stat = "identity") +
+  facet_grid(~ Toxin_prediction) +
+  xlab(element_blank()) +
+  ylab("relative abundance (RNum_Gi)") +
+  ggtitle("Toxin") +
+  theme(legend.position = "none") # out comment for legend
+# Virulence Figure
+Virulence_ONT_figure <- Virulence_ONT %>% filter(virulence_prediction != "non-pathogenic")
+Virulence_plot <- Virulence_ONT_figure %>% ggplot(aes(x=Sample, y=RNum_Gi, fill = Sample)) +
+  geom_bar(stat = "identity") +
+  facet_grid(~ virulence_prediction) +
+  xlab(element_blank()) +
+  ylab("relative abundance (RNum_Gi)") +
+  ggtitle("Virulence") +
+  theme(legend.position = "none")
+# AMR overall Figure
+AMR_ONT_overall_Figure <- AMR_ONT_overall %>% filter(AMR != "-")
+AMR_plot <- AMR_ONT_overall_Figure %>% ggplot(aes(x=Sample, y=RNum_Gi, fill = Sample)) +
+  geom_bar(stat = "identity") +
+  facet_grid(~ AMR) +
+  xlab(element_blank()) +
+  ylab("relative abundance (RNum_Gi)") +
+  ggtitle("AMR")
+
+# Combine figures
+library(cowplot)
+plot_grid(Virulence_plot, Toxin_plot, AMR_plot, nrow = 1, labels = "auto")
+
diff --git a/figures/src/fig_crispr.R b/figures/src/fig_crispr.R
new file mode 100644
index 0000000000000000000000000000000000000000..2085fac6c21a4ee0718ae717510699d66d61ac61
--- /dev/null
+++ b/figures/src/fig_crispr.R
@@ -0,0 +1,115 @@
+#!/usr/bin/Rscript
+
+## LOG FILE
+sink(file=file(snakemake@log[[1]], open="wt"), type="message")
+
+## NOTE
+# Plot CRISPR statistics
+
+## IMPORT
+suppressMessages(library(testit))
+# suppressMessages(library(UpSetR))
+suppressMessages(library(ggplot2))
+# custom
+source(snakemake@params$utils)
+
+## DATA
+stats <- read.csv(
+    file=snakemake@input$stats,
+    sep="\t",
+    header=TRUE,
+    stringsAsFactors=FALSE,
+    check.names=FALSE
+)
+# change names
+stats$crispr_tool <- CRISPR_TOOL_NAMES[stats$crispr_tool]
+stats$asm_tool    <- ASM_TOOL_NAMES[stats$asm_tool]
+# aggregate: spacers per contig
+# stats_aggr_sc <- aggregate(
+#     x=stats$spacers,
+#     by=list(seq_id=stats$seq_id, crispr_tool=stats$crispr_tool, asm_tool=stats$asm_tool),
+#     FUN=sum
+# )
+# aggregate: spacers per assembly
+stats_aggr_sa <- aggregate(
+    x=stats$spacers,
+    by=list(crispr_tool=stats$crispr_tool, asm_tool=stats$asm_tool),
+    FUN=sum
+)
+# aggregate: arrays per assembly
+stats_aggr_aa <- aggregate(
+    x=stats$seq_id,
+    by=list(crispr_tool=stats$crispr_tool, asm_tool=stats$asm_tool),
+    FUN=length
+)
+
+## PLOTS
+# UpSetR plotting function
+plot_upsetr <- function(df, asm_tool){
+    asm_sets <- lapply(
+        CRISPR_TOOL_NAMES,
+        function(x){ unique(unlist(df[df$asm_tool==asm_tool & df$crispr_tool == x,"seq_id"])) }
+    )
+    names(asm_sets) <- CRISPR_TOOL_NAMES[names(asm_sets)]
+    UpSetR::upset(
+        data=UpSetR::fromList(asm_sets),
+        order.by="degree",
+        decreasing=FALSE,
+        mainbar.y.label=sprintf("Contig intersection size (%s)", asm_tool),
+        sets.x.label="Contigs w/ CRISPR array(s)"
+    )
+}
+
+# Custom ggplot2 theme
+my_theme <-
+    theme_bw() +
+    theme(
+        # legend
+        legend.title=element_blank(),
+        # grid
+        panel.grid=element_blank(),
+        # strip
+        strip.background=element_rect(fill="white"),
+        strip.text=element_text(size=12, color="black"),
+        # axes
+        axis.title=element_text(size=12, color="black"),
+        axis.text.y=element_text(size=9, color="black"),
+        axis.text.x=element_text(size=9, color="black", angle=90, vjust=0.5, hjust=1)
+    )
+
+# List of ggplot plots
+plots <- list()
+
+# Plot: Number of spacers per tool/assembly
+plots$spacers_asm <-
+    ggplot(data=stats_aggr_sa, aes(x=crispr_tool, y=x, fill=asm_tool)) +
+    geom_col(position="dodge") +
+    scale_fill_manual(values=ASM_TOOL_COLORS$notmeth, guide="legend") +
+    labs(
+        x="",
+        y="Number of spacers"
+    ) +
+    my_theme
+
+# Plot: Number of arrays per tool/assembly
+plots$arrays_asm <-
+    ggplot(data=stats_aggr_aa, aes(x=crispr_tool, y=x, fill=asm_tool)) +
+    geom_col(position="dodge") +
+    scale_fill_manual(values=ASM_TOOL_COLORS$notmeth, guide="legend") +
+    labs(
+        x="",
+        y="Number of CRISPR arrays"
+    ) +
+    my_theme
+
+# Plot: UpSetR (+ PDF)
+# NOTE:
+#   For an unknown reason creating and saving multiple plots using a for-loop or sapply does not work - the PDF is empty.
+#   Creating a PDF per plot in a for-loop or using sapply does not work either - each PDF is empty.
+pdf(snakemake@output$pdf, width=snakemake@params$width, height=snakemake@params$height)
+for(pp in plots){ print(pp) }
+plot_upsetr(stats, "Flye")
+plot_upsetr(stats, "MEGAHIT")
+plot_upsetr(stats, "metaSPAdes")
+plot_upsetr(stats, "metaSPAdes (H)")
+dev.off()
diff --git a/figures/src/fig_mmseq_upsetr.R b/figures/src/fig_mmseq_upsetr.R
new file mode 100644
index 0000000000000000000000000000000000000000..43ad473a5bd4b3fa54026dc591da661b7ff2f115
--- /dev/null
+++ b/figures/src/fig_mmseq_upsetr.R
@@ -0,0 +1,53 @@
+#!/usr/bin/Rscript
+
+## LOG FILE
+sink(file=file(snakemake@log[[1]], open="wt"), type="message")
+
+## NOTE
+# UpSetR
+# https://cran.r-project.org/web/packages/UpSetR/vignettes/basic.usage.html
+
+## IMPORT
+# suppressMessages(library(testit))
+suppressMessages(library(UpSetR))
+# custom
+source(snakemake@params$utils)
+
+## DATA
+# overlap/intersection sizes
+overlap_sizes <- read.csv(
+    file=snakemake@input$overlap_sizes,
+    sep=" ",
+    header=FALSE,
+    stringsAsFactors=FALSE
+)
+# check: all expected tools are there
+# testit::assert(all(sapply(names(ASM_TOOL_NAMES), function(x){ x %in% overlap_sizes$V2 })))
+# process names
+for(tname in names(ASM_TOOL_NAMES)){
+    overlap_sizes$V2 <- sub(tname, ASM_TOOL_NAMES[tname], overlap_sizes$V2)
+}
+overlap_sizes$V2 <- gsub("_", "&", overlap_sizes$V2)
+# create input for UpSetR
+overlap_names <- overlap_sizes$V2
+overlap_sizes <- overlap_sizes$V1
+names(overlap_sizes) <- overlap_names
+
+## PLOT
+pdf(snakemake@output$pdf, width=snakemake@params$width, height=snakemake@params$height, onefile=FALSE)
+UpSetR::upset(
+    data=UpSetR::fromExpression(overlap_sizes),
+    # overlap order
+    order.by="degree",
+    decreasing=FALSE,
+    # colors
+    set.metadata=list(
+        data=data.frame(
+            sets=ASM_TOOL_NAMES_NOTMETH,
+            Tool=ASM_TOOL_NAMES_NOTMETH,
+            stringsAsFactors=FALSE
+        ), 
+        plots=list(list(type="matrix_rows", column="Tool", colors=ASM_TOOL_COLORS$notmeth, alpha=0.7))
+    )
+)
+dev.off()
diff --git a/figures/src/fig_nanostats.R b/figures/src/fig_nanostats.R
new file mode 100644
index 0000000000000000000000000000000000000000..a76a9d5ac3176fe3ce6e2fb2683728d55330d715
--- /dev/null
+++ b/figures/src/fig_nanostats.R
@@ -0,0 +1,62 @@
+#!/usr/bin/Rscript
+
+## LOG FILE
+sink(file=file(snakemake@log[[1]], open="wt"), type="message")
+
+## NOTE
+# Plot NanoStats statistics
+
+## IMPORT
+suppressMessages(library(testit))
+suppressMessages(library(ggplot2))
+suppressMessages(library(reshape2))
+# custom
+source(snakemake@params$utils)
+
+## DATA
+stats <- read.csv(
+    file=snakemake@input$stats,
+    sep="\t",
+    header=TRUE,
+    stringsAsFactors=FALSE,
+    check.names=FALSE
+)
+# change names
+stats$meth_flag <- METHYLATION_NAMES[stats$meth_flag]
+stats$sel_flag  <- RSEL_TOOL_NAMES[stats$sel_flag]
+# reshape
+stats_melted <- reshape2::melt(stats, id.vars=c("meth_flag", "sel_flag"))
+stats_melted$meth_flag <- factor(stats_melted$meth_flag, levels=METHYLATION_NAMES, ordered=TRUE)
+stats_melted$sel_flag  <- factor(stats_melted$sel_flag, levels=RSEL_TOOL_NAMES, ordered=TRUE)
+
+## PLOT
+stats_p <-
+    ggplot(data=stats_melted, aes(x=sel_flag, y=value, fill=meth_flag)) +
+    geom_col(position="dodge") +
+    scale_fill_manual(values=METHYLATION_COLORS$new, guide="legend") +
+    facet_wrap(vars(variable), ncol=1, scales="free_y") +
+    labs(
+        # title="",
+        # subtitle="",
+        x="",
+        y=""
+    ) +
+    theme_bw() +
+    theme(
+        # legend
+        legend.title=element_blank(),
+        # grid
+        panel.grid=element_blank(),
+        # strip
+        strip.background=element_rect(fill="white"),
+        strip.text=element_text(size=12, color="black"),
+        # axes
+        axis.title=element_text(size=12, color="black"),
+        axis.text.y=element_text(size=9, color="black"),
+        axis.text.x=element_text(size=9, color="black", angle=90, vjust=0.5, hjust=1)
+    )
+
+## PDF
+pdf(snakemake@output$pdf, width=snakemake@params$width, height=snakemake@params$height)
+print(stats_p)
+dev.off()
diff --git a/figures/src/fig_partial_genes.R b/figures/src/fig_partial_genes.R
new file mode 100644
index 0000000000000000000000000000000000000000..dda766ae21dc4774be853c9c6b234cac16456d5f
--- /dev/null
+++ b/figures/src/fig_partial_genes.R
@@ -0,0 +1,64 @@
+#!/usr/bin/Rscript
+
+## LOG FILE
+sink(file=file(snakemake@log[[1]], open="wt"), type="message")
+
+## NOTE
+# Plot protein/gene counts
+
+## IMPORT
+suppressMessages(library(testit))
+suppressMessages(library(ggplot2))
+suppressMessages(library(reshape2))
+# custom
+source(snakemake@params$utils)
+
+## DATA
+counts <- read.csv(
+    file=snakemake@input$counts,
+    sep="\t",
+    header=TRUE,
+    stringsAsFactors=FALSE,
+    check.names=FALSE
+)
+# change tool names
+counts$tool <- GENE_TOOL_NAMES[counts$tool]
+colnames(counts) <- sapply(colnames(counts), function(x){ ifelse(x %in% names(ASM_TOOL_NAMES), ASM_TOOL_NAMES[x], x) })
+# reshape
+counts_melted <- reshape2::melt(counts, id.vars="tool")
+# Gene tool name - as ordered factor
+counts_melted$tool <- factor(counts_melted$tool, levels=GENE_TOOL_NAMES, ordered=TRUE)
+# Gene tool name - short version
+counts_melted$tool_name <- sapply(as.character(counts_melted$tool), function(x){ unlist(strsplit(x, " "))[1] })
+
+## PLOT
+counts_p <-
+    ggplot(data=counts_melted, aes(x=tool, y=value, fill=variable)) +
+    geom_col(position = "dodge") +
+    scale_fill_manual(values=ASM_TOOL_COLORS$notmeth, guide="legend") +
+    facet_wrap(vars(tool_name), nrow=1, scales="free_x") +
+    labs(
+        # title="",
+        # subtitle="",
+        x="",
+        y="Number of proteins/genes"
+    ) +
+    theme_bw() +
+    theme(
+        # legend
+        legend.title=element_blank(),
+        # grid
+        panel.grid=element_blank(),
+        # strip
+        strip.background=element_rect(fill="white"),
+        strip.text=element_text(size=12, color="black"),
+        # axes
+        axis.title=element_text(size=12, color="black"),
+        axis.text.y=element_text(size=9, color="black"),
+        axis.text.x=element_text(size=9, color="black", angle=90, vjust=0.5, hjust=1)
+    )
+
+## PDF
+pdf(snakemake@output$pdf, width=snakemake@params$width, height=snakemake@params$height)
+print(counts_p)
+dev.off()
diff --git a/figures/src/fig_plasflow.R b/figures/src/fig_plasflow.R
new file mode 100644
index 0000000000000000000000000000000000000000..927ccce75255779cd20df47e920057ef6afd804f
--- /dev/null
+++ b/figures/src/fig_plasflow.R
@@ -0,0 +1,63 @@
+#!/usr/bin/Rscript
+
+## LOG FILE
+sink(file=file(snakemake@log[[1]], open="wt"), type="message")
+
+## NOTE
+# Plot PlasFlow statistics
+
+## IMPORT
+suppressMessages(library(testit))
+suppressMessages(library(UpSetR))
+suppressMessages(library(ggplot2))
+suppressMessages(library(reshape2))
+# custom
+source(snakemake@params$utils)
+
+## DATA
+stats <- read.csv(
+    file=snakemake@input$stats,
+    sep="\t",
+    header=TRUE,
+    stringsAsFactors=FALSE,
+    check.names=FALSE
+)
+# change names
+stats$tool <- ASM_TOOL_NAMES[stats$tool]
+# reshape
+stats_melted <- reshape2::melt(stats, id.vars=c("label", "type", "tool"), variable.name="statstype")
+
+## PLOT
+plots <- list()
+dtype <- "class"
+for(stype in unique(stats_melted$statstype)){
+    plots[[sprintf("%s_%s", dtype, stype)]] <-
+        ggplot(
+            data=stats_melted[stats_melted$statstype == stype & stats_melted$type == dtype,],
+            aes_string(x="tool", y="value", fill="label")
+        ) +
+        geom_col(position="dodge") +
+        scale_fill_manual(values=PLASFLOW_COLORS[[dtype]], guide="legend") +
+        labs(
+            # title="",
+            # subtitle="",
+            x="",
+            y=PLASFLOW_NAMES$statstype[stype]
+        ) +
+        theme_bw() +
+        theme(
+            # legend
+            legend.title=element_blank(),
+            # grid
+            panel.grid=element_blank(),
+            # axes
+            axis.title=element_text(size=12, color="black"),
+            axis.text.y=element_text(size=9, color="black"),
+            axis.text.x=element_text(size=9, color="black", angle=90, vjust=0.5, hjust=1)
+        )
+}
+
+## PDF
+pdf(snakemake@output$pdf, width=snakemake@params$width, height=snakemake@params$height)
+for(pp in plots){ print(pp) }
+dev.off()
diff --git a/figures/src/fig_quast.R b/figures/src/fig_quast.R
new file mode 100644
index 0000000000000000000000000000000000000000..e6bbdad55ef5d1cdf77d1888ac25f90565efe02f
--- /dev/null
+++ b/figures/src/fig_quast.R
@@ -0,0 +1,66 @@
+#!/usr/bin/Rscript
+
+## LOG FILE
+sink(file=file(snakemake@log[[1]], open="wt"), type="message")
+
+## NOTE
+# Plot QUAST statistics
+
+## IMPORT
+suppressMessages(library(testit))
+suppressMessages(library(ggplot2))
+suppressMessages(library(reshape2))
+# custom
+source(snakemake@params$utils)
+
+## DATA
+stats <- read.csv(
+    file=snakemake@input$stats,
+    sep="\t",
+    header=TRUE,
+    stringsAsFactors=FALSE,
+    check.names=FALSE
+)
+# change tool names
+colnames(stats) <- sapply(colnames(stats), function(x){ ifelse(x %in% names(ASM_TOOL_NAMES), ASM_TOOL_NAMES[x], x) })
+# stat.s to plot
+stats_vars <- c(
+    "# contigs",
+    "Largest contig",            
+    "Total length",
+    "N50",
+    "L50",                       
+    "# N's per 100 kbp"
+)
+# reshape
+stats_melted <- reshape2::melt(stats, id.vars="Assembly")
+
+## PLOT
+stats_p <-
+    ggplot(data=stats_melted[sapply(stats$Assembly, function(x){ x %in% stats_vars }),], aes(x=variable, y=value)) +
+    geom_col(aes(fill=variable)) +
+    scale_fill_manual(values=c(ASM_TOOL_COLORS$notmeth, ASM_TOOL_COLORS$meth), guide=NULL) +
+    facet_wrap(vars(Assembly), ncol=1, scales="free_y") +
+    labs(
+        # title="",
+        # subtitle="",
+        x="",
+        y="QUAST statistic"
+    ) +
+    theme_bw() +
+    theme(
+        # grid
+        panel.grid=element_blank(),
+        # strip
+        strip.background=element_rect(fill="white"),
+        strip.text=element_text(size=12, color="black"),
+        # axes
+        axis.title=element_text(size=12, color="black"),
+        axis.text.y=element_text(size=9, color="black"),
+        axis.text.x=element_text(size=9, color="black", angle=90, vjust=0.5, hjust=1)
+    )
+
+## PDF
+pdf(snakemake@output$pdf, width=snakemake@params$width, height=snakemake@params$height)
+print(stats_p)
+dev.off()
diff --git a/figures/src/fig_rgi.R b/figures/src/fig_rgi.R
new file mode 100644
index 0000000000000000000000000000000000000000..eb9cf47d8cd0dc2da93a22026eba7af46a32f7f8
--- /dev/null
+++ b/figures/src/fig_rgi.R
@@ -0,0 +1,115 @@
+#!/usr/bin/Rscript
+
+## LOG FILE
+sink(file=file(snakemake@log[[1]], open="wt"), type="message")
+
+## NOTE
+# Plot RGI statistics
+
+## IMPORT
+suppressMessages(library(testit))
+suppressMessages(library(UpSetR))
+suppressMessages(library(ggplot2))
+suppressMessages(library(reshape2))
+suppressMessages(library(UpSetR))
+# custom
+source(snakemake@params$utils)
+
+## DATA
+stats <- read.csv(
+    file=snakemake@input$stats,
+    sep="\t",
+    header=TRUE,
+    stringsAsFactors=FALSE,
+    check.names=FALSE
+)
+# change names
+colnames(stats) <- sapply(colnames(stats), function(x){ ifelse(x %in% names(ASM_TOOL_NAMES), ASM_TOOL_NAMES[x], x) })
+# reshape
+stats_melted <- reshape2::melt(stats, id.vars=c("label", "col", "type"), variable.name="tool", value.name="count")
+# change names
+# stats_melted$tool <- ASM_TOOL_NAMES[as.character(stats_melted$tool)]
+
+## PLOT: counts
+my_theme <- 
+    theme_bw() +
+    theme(
+        # grid
+        panel.grid=element_blank(),
+        # strip
+        strip.background=element_rect(fill="white"),
+        strip.text=element_text(size=9, color="black"),
+        # axes
+        axis.title=element_text(size=12, color="black"),
+        axis.text.y=element_text(size=9, color="black"),
+        axis.text.x=element_text(size=9, color="black", angle=90, vjust=0.5, hjust=1)
+    )
+
+plots <- list()
+for(ctype in unique(stats_melted$type)){
+    # Counts per feature type
+    for(col in unique(stats_melted$col)){
+        df <- stats_melted[stats_melted$type == ctype & stats_melted$col == col,]
+        plots[[sprintf("%s_%s", ctype, col)]] <-
+            ggplot(data=df, aes(x=tool, y=count, fill=tool)) +
+            geom_col() +
+            scale_fill_manual(values=ASM_TOOL_COLORS$notmeth, guide=NULL) +
+            facet_wrap(vars(label), ncol=round(sqrt(length(unique(df$label)))), scales="free_y") +
+            labs(
+                # title="",
+                subtitle=sprintf("Hits: %s", ctype),
+                x="",
+                y=RGI_NAMES$col[col]
+            ) +
+            my_theme
+    }
+}
+# Total counts
+df <- stats_melted[stats_melted$col == "ARO",]
+df <- aggregate(df$count, by=list(tool=df$tool, type=df$type), FUN=sum)
+plots[[sprintf("%s_%s", ctype, "total")]] <-
+    ggplot(data=df, aes(x=tool, y=x, fill=tool)) +
+    geom_col() +
+    scale_fill_manual(values=ASM_TOOL_COLORS$notmeth, guide=NULL) +
+    facet_wrap(vars(type), ncol=3) +
+    labs(
+        x="",
+        y="Total hits"
+    ) +
+    my_theme
+
+## PLOT: intersection &  PDF
+plot_overlap <- function(df, ctype, col){
+    df <- df[df$type == ctype & df$col == col,]
+    df_list <- lapply(ASM_TOOL_NAMES_NOTMETH, function(x){ df[df[,x] > 0,"label"] })
+    names(df_list) <- ASM_TOOL_NAMES_NOTMETH[names(df_list)]
+    UpSetR::upset(
+        data=UpSetR::fromList(df_list),
+        # overlap order
+        order.by="degree",
+        decreasing=FALSE,
+        # y-label title
+        mainbar.y.label=sprintf("Intersection size (%s hits, %s)", ctype, RGI_NAMES$col[col]),
+        # text size
+        text.scale = c(2, 2, 1.5, 1.5, 2, 2),
+        # colors
+        set.metadata=list(
+            data=data.frame(
+                sets=names(df_list),
+                Tool=names(df_list),
+                stringsAsFactors=FALSE
+            ), 
+            plots=list(list(type="matrix_rows", column="Tool", colors=ASM_TOOL_COLORS$notmeth, alpha=0.7))
+        )
+    )
+}
+
+
+pdf(snakemake@output$pdf, width=snakemake@params$width, height=snakemake@params$height)
+# Count plots
+for(pp in plots){ print(pp) }
+# Intersection plot
+plot_overlap(stats, "all", "ARO")
+plot_overlap(stats, "nudged", "ARO")
+plot_overlap(stats, "strict", "ARO")
+dev.off()
diff --git a/figures/src/utils.R b/figures/src/utils.R
new file mode 100644
index 0000000000000000000000000000000000000000..eb191361b41ba75f239d6a87e9136ae15dbc358c
--- /dev/null
+++ b/figures/src/utils.R
@@ -0,0 +1,103 @@
+#!/usr/bin/Rscript
+
+suppressMessages(library(ggsci)) # colors
+
+##############################
+# Assemblers
+# how the tool names should be changed
+ASM_TOOL_NAMES_NOTMETH <- c( # IMPORTANT: the order matters !!!
+    # not methylation-aware
+    "flye"="Flye",
+    "megahit"="MEGAHIT",
+    "metaspades_hybrid"="metaSPAdes (H)",
+    "metaspades"="metaSPAdes"
+)
+ASM_TOOL_NAMES_METH <- c( # IMPORTANT: the order matters !!!
+    # methylation-aware
+    "meth_flye"="Flye (M)",
+    "meth_megahit"="MEGAHIT (M)",
+    "meth_metaspades_hybrid"="metaSPAdes (M, H)",
+    "meth_metaspades"="metaSPAdes (M)"
+)
+ASM_TOOL_NAMES <- c( # IMPORTANT: the order matters !!!
+    ASM_TOOL_NAMES_METH,
+    ASM_TOOL_NAMES_NOTMETH
+)
+# tool colors
+ASM_TOOL_COLORS <- list()
+# methylation-aware
+ASM_TOOL_COLORS$meth    <- ggsci::pal_nejm("default", alpha=1)(4)
+names(ASM_TOOL_COLORS$meth) <- ASM_TOOL_NAMES_METH
+ASM_TOOL_COLORS$meth_raw <- ASM_TOOL_COLORS$meth
+names(ASM_TOOL_COLORS$meth_raw) <- names(ASM_TOOL_NAMES_METH)
+# not methylation-aware
+ASM_TOOL_COLORS$notmeth <- ggsci::pal_nejm("default", alpha=0.7)(4)
+names(ASM_TOOL_COLORS$notmeth) <- ASM_TOOL_NAMES_NOTMETH
+ASM_TOOL_COLORS$notmeth_raw <- ASM_TOOL_COLORS$notmeth
+names(ASM_TOOL_COLORS$notmeth_raw) <- names(ASM_TOOL_NAMES_NOTMETH)
+
+##############################
+# Gene tools
+GENE_TOOL_NAMES <- c(
+    "prodigal_partial"="Prodigal (partial)",
+    "prodigal_total"="Prodigal (total)",
+    "cdhit_unique"="CD-HIT (unique)",
+    "cdhit_total"="CD-HIT (total)"
+)
+
+##############################
+# Read selection methods
+RSEL_TOOL_NAMES <- c(
+    "size_selected"="size selected",
+    "gtube"="g-TUBE",
+    "merged"="merged"
+)
+
+##############################
+# Methylation flag
+METHYLATION_NAMES <- c(
+    "M"="Methylation aware",
+    "N"="Non-methylation aware"
+)
+
+METHYLATION_COLORS <- list(
+    raw=ggsci::pal_nejm("default", alpha=1)(6)[c(5,6)]
+)
+METHYLATION_COLORS$new <- METHYLATION_COLORS$raw
+names(METHYLATION_COLORS$raw) <- names(METHYLATION_NAMES)
+names(METHYLATION_COLORS$new) <- METHYLATION_NAMES
+
+##############################
+# CRISPR tools
+CRISPR_TOOL_NAMES <- c(
+    "minced"="MinCED",
+    "casc"="CasC"
+)
+
+##############################
+# PlasFlow
+PLASFLOW_NAMES <- list(
+    statstype=c(
+        count="Sequence count",
+        sum="Cumulative sequence length [bp]",
+        count_pct="Sequence count [%]",
+        sum_pct="Cumulative sequence length [%]"
+    )
+)
+
+PLASFLOW_COLORS <- list(
+    class=ggsci::pal_nejm("default", alpha=1)(4)[c(2,3,4)]
+)
+names(PLASFLOW_COLORS$class) <- c("chromosome", "plasmid", "unclassified")
+
+##############################
+# RGI
+RGI_NAMES <- list(
+    col=c(
+        "Best_Hit_ARO"="ARO term",
+        "ARO"="ARO",
+        "Drug Class"="Drug class",       
+        "Resistance Mechanism"="Resistance mechanism",
+        "AMR Gene Family"="Gene family"
+    )
+)
\ No newline at end of file
diff --git a/figures/src/utils.py b/figures/src/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a71dcf17fbc83f1f23431a8699fb2116f21d173
--- /dev/null
+++ b/figures/src/utils.py
@@ -0,0 +1,79 @@
+#!/usr/bin/python
+
+def parse_casc_report(ifile_path):
+    import pandas
+    # read in
+    summary = pandas.read_csv(ifile_path, sep="\t", header=0, usecols=["#seq_id", "spacers", "array_start", "array_stop", "bonafide"], comment=None)
+    # rename columns
+    summary.rename(columns={"#seq_id": "seq_id"}, inplace=True)
+    # filter
+    summary = summary[summary["bonafide"]]
+    assert all(summary["bonafide"]) # just a check
+    # drop columns
+    summary.drop(columns=["bonafide"], inplace=True)
+    return summary
+
+def parse_minced_report(ifile_path):
+    """
+    Parse the MINCED report: for each sequence and found CRISPR array, extract
+    - array start and end
+    - number of spacers = number of unique spacer sequences
+
+    Entry (sequence/CRISPR array) format:
+        Sequence '<string>' (<int> bp)
+
+        CRISPR <int>   Range: <int> - <int>
+        POSITION        REPEAT                                  SPACER
+        --------        ------------------------------------    ------------------------------
+        <int>           <string: ACTG>                          <string: ACTG>                 [ <int>, <int> ]
+        <...>
+        <int>           <string: ACTG>    
+        --------        ------------------------------------    ------------------------------
+        Repeats: <int>      Average Length: <int>              Average Length: <int>
+
+        CRISPR <int>   Range: <int> - <int>
+        <...>
+
+        Time to find repeats: <int> ms
+    """
+    import re
+    summary = []
+    with open(ifile_path, "r") as ifile:
+        new_seq = 0
+        seq_id = None
+        for line in ifile:
+            if re.match("Sequence ", line):
+                assert new_seq == 0, "Unexpected parsing error in {} in {}".format(line, ifile_path)
+                line_re = re.search(re.compile("^Sequence '(?P<seq_id>[\w\.]+)' \((?P<seq_len>\d+) bp\)$"), line)
+                assert line_re, "Could not extract info from \"{}\" in {}".format(line, ifile_path)
+                seq_id = line_re.group("seq_id")
+                new_seq = 1
+            elif re.match("CRISPR ", line):
+                assert new_seq == 1, "No seq before {} in {}".format(line, ifile_path)
+                line_re = re.search(re.compile("^CRISPR (?P<crispr_id>\d+)\s+Range: (?P<array_start>\d+) - (?P<array_stop>\d+)$"), line)
+                assert line_re, "Could not extract info from \"{}\" in {}".format(line, ifile_path)
+                summary.append({
+                    "seq_id": seq_id,
+                    "array_start": int(line_re.group("array_start")),
+                    "array_stop": int(line_re.group("array_stop")),
+                    "spacers": set()
+                })
+                new_seq += 1
+            elif re.match("POSITION", line):
+                assert new_seq == 2, "No new seq/CRISPR before {} in {}".format(line, ifile_path)
+                new_seq += 1
+            elif re.match("[0-9]+\s+", line):
+                assert re.match("[0-9]+\s+[ACTG]+", line), "Unexpected repeat/spacer format in {} in {}".format(line, ifile_path)
+                assert new_seq == 3, "No new seq./CRISPR before {} in {}".format(line, ifile_path)
+                if re.match("[0-9]+\s+[ACTG]+\s+[ACTG]+\s+", line):
+                    line_re = re.search(re.compile("^[0-9]+\s+(?P<repeat>[ACTG]+)\s+(?P<spacer>[ACTG]+)\s+.*$"), line)
+                    assert line_re, "Could not extract repeat/spacer from \"{}\" in {}".format(line, ifile_path)
+                    summary[len(summary)-1]["spacers"].add(line_re.group("spacer"))
+            elif re.match("Repeats:", line):
+                new_seq = 1
+                summary[len(summary)-1]["spacers"] = len(summary[len(summary)-1]["spacers"])
+            elif re.match("Time to find repeats", line):
+                assert new_seq == 1 and summary[len(summary)-1]["spacers"] > 0, "No new seq./CRISPR/spacers before {} in {}".format(line, ifile_path)
+                new_seq = 0
+                seq_id = None
+    return summary
diff --git a/figures/src_SBB/2019_GDB_partial_genes.R b/figures/src_SBB/2019_GDB_partial_genes.R
new file mode 100644
index 0000000000000000000000000000000000000000..29035438973ca5d65cd7350263ad30b10fb6ebf1
--- /dev/null
+++ b/figures/src_SBB/2019_GDB_partial_genes.R
@@ -0,0 +1,33 @@
+setwd("~/Documents/R_data/nanopore_ONT/")
+library(tidyverse)
+partial <- read.table("2019_GDB_protein_partial_genes.txt", header = TRUE)
+
+# pivoting the dataset
+long_merged <- pivot_longer(partial, cols=c(-tool), names_to = "group", values_to = "counts")
+
+# setting a common theme
+theme_set(theme_bw() +
+            theme(strip.background = element_rect(colour = "black", fill = "white"), strip.text = element_text(size=17, family="Helvetica")) +
+            theme(axis.text.x = element_text(angle = 90, size = 18, family = "Helvetica", face = "bold", color = "black"),
+                  axis.text.y = element_text(size = 18, family = "Helvetica"),
+                  axis.title.y = element_text(size = 18, face = "bold", family = "Helvetica"),
+                  axis.title.x=element_text(size = 18, face = "bold", family = "Helvetica"),
+                  axis.ticks.x=element_blank()) +
+            theme(plot.title = element_text(hjust = 0.5)) +
+            theme(legend.text = element_text(size = 18, family = "Helvetica"),
+                  legend.title = element_text(size = 18, family = "Helvetica", face = "bold"), legend.direction = "vertical", legend.box = "vertical")) 
+
+
+# Basic plots
+install.packages("devtools")
+library(devtools)
+install_github("kassambara/easyGgplot2")
+library(easyGgplot2)
+data_long <- long_merged
+
+ggplot(data_long) + 
+geom_bar(aes(fill=group, y=counts, x=tool), stat = "identity",  position=position_dodge()) + scale_fill_manual("assembly", values = c("metaspades" = "grey", "metaspades_hybrid" = "dodgerblue2"))
+
+
+ggplot2.barplot(data=data_long, xName='tool', yName='counts',
+                groupName='group', position=position_dodge())
diff --git a/figures/src_SBB/MAGS_2019_GDB_analyses.R b/figures/src_SBB/MAGS_2019_GDB_analyses.R
new file mode 100644
index 0000000000000000000000000000000000000000..253961e374b7e3e4418d6d650fc2baa8bfe62357
--- /dev/null
+++ b/figures/src_SBB/MAGS_2019_GDB_analyses.R
@@ -0,0 +1,133 @@
+# Assessing the number and kind of bins found due to the different assembly and mapping methods used for the 2019_GDB ONT analsyes
+
+setwd("~/Documents/R_data/nanopore_ONT/")
+library(tidyverse)
+
+# importing files and renaming columns
+bwa_lr_MH <- read.table("bwa_lr_metaspades_hybrid_HQ_bins.txt", header = FALSE) %>% rename(bin = V1, taxonomy = V2, completion = V3, contamination = V4, strain_heterogeneity = V5) 
+bwa_sr_MH <- read.table("bwa_sr_metaspades_hybrid_HQ_bins.txt", header = FALSE) %>% rename(bin = V1, taxonomy = V2, completion = V3, contamination = V4, strain_heterogeneity = V5)
+bwa_merged_MH <- read.table("bwa_merged_metaspades_hybrid_HQ_bins.txt", header = FALSE) %>% rename(bin = V1, taxonomy = V2, completion = V3, contamination = V4, strain_heterogeneity = V5)
+mmi_lr_MH <- read.table("mmi_lr_metaspades_hybrid_HQ_bins.txt", header = FALSE) %>% rename(bin = V1, taxonomy = V2, completion = V3, contamination = V4, strain_heterogeneity = V5)
+mmi_sr_MH <- read.table("mmi_sr_metaspades_hybrid_HQ_bins.txt", header = FALSE) %>% rename(bin = V1, taxonomy = V2, completion = V3, contamination = V4, strain_heterogeneity = V5)
+mmi_merged_MH <- read.table("mmi_merged_metaspades_hybrid_HQ_bins.txt", header = FALSE) %>% rename(bin = V1, taxonomy = V2, completion = V3, contamination = V4, strain_heterogeneity = V5)
+flye <- read.table("flye_HQ_bins.txt", header = FALSE) %>% rename(bin = V1, taxonomy = V2, completion = V3, contamination = V4, strain_heterogeneity = V5)
+megahit <- read.table("megahit_HQ_bins.txt", header = FALSE) %>% rename(bin = V1, taxonomy = V2, completion = V3, contamination = V4, strain_heterogeneity = V5)
+
+# removing the "bin" column and separating taxonomy into individual columns
+bwa_lr_MH_edited <- bwa_lr_MH %>% select(-bin) %>%
+  separate(taxonomy, c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = ";", remove = FALSE) 
+bwa_sr_MH_edited <- bwa_sr_MH %>% select(-bin) %>%
+  separate(taxonomy, c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = ";", remove = FALSE) 
+bwa_merged_MH_edited <- bwa_merged_MH %>% select(-bin) %>%
+  separate(taxonomy, c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = ";", remove = FALSE) 
+mmi_lr_MH_edited <- mmi_lr_MH %>% select(-bin) %>%
+  separate(taxonomy, c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = ";", remove = FALSE) 
+mmi_sr_MH_edited <- mmi_sr_MH %>% select(-bin) %>%
+  separate(taxonomy, c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = ";", remove = FALSE) 
+mmi_merged_MH_edited <- mmi_merged_MH %>% select(-bin) %>%
+  separate(taxonomy, c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = ";", remove = FALSE) 
+flye_edited <- flye %>% select(-bin) %>%
+  separate(taxonomy, c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = ";", remove = FALSE) 
+megahit_edited <- megahit %>% select(-bin) %>%
+  separate(taxonomy, c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = ";", remove = FALSE) 
+
+
+# Keeping only the Family and Genus columns and adding family if the latter is missing
+bwa_lr_tax <- bwa_lr_MH_edited %>% select(Family, Genus, completion, contamination, strain_heterogeneity) %>% mutate(Genus = gsub("g__", "", Genus), Family = gsub("f__", "", Family)) %>% mutate_if(is.character, list(~na_if(.,""))) 
+
+bwa_sr_tax <- bwa_sr_MH_edited %>% select(Family, Genus, completion, contamination, strain_heterogeneity) %>% mutate(Genus = gsub("g__", "", Genus), Family = gsub("f__", "", Family)) %>% mutate_if(is.character, list(~na_if(.,""))) 
+
+bwa_merged_tax <- bwa_merged_MH_edited %>% select(Family, Genus, completion, contamination, strain_heterogeneity) %>% mutate(Genus = gsub("g__", "", Genus), Family = gsub("f__", "", Family)) %>% mutate_if(is.character, list(~na_if(.,""))) 
+
+mmi_lr_tax <- mmi_lr_MH_edited %>% select(Family, Genus, completion, contamination, strain_heterogeneity) %>% mutate(Genus = gsub("g__", "", Genus), Family = gsub("f__", "", Family)) %>% mutate_if(is.character, list(~na_if(.,""))) 
+
+mmi_sr_tax <- mmi_sr_MH_edited %>% select(Family, Genus, completion, contamination, strain_heterogeneity) %>% mutate(Genus = gsub("g__", "", Genus), Family = gsub("f__", "", Family)) %>% mutate_if(is.character, list(~na_if(.,""))) 
+
+mmi_merged_tax <- mmi_merged_MH_edited %>% select(Family, Genus, completion, contamination, strain_heterogeneity) %>% mutate(Genus = gsub("g__", "", Genus), Family = gsub("f__", "", Family)) %>% mutate_if(is.character, list(~na_if(.,""))) 
+
+flye_tax  <- flye_edited %>% select(Family, Genus, completion, contamination, strain_heterogeneity) %>% mutate(Genus = gsub("g__", "", Genus), Family = gsub("f__", "", Family)) %>% mutate_if(is.character, list(~na_if(.,""))) 
+
+megahit_tax <- megahit_edited %>% select(Family, Genus, completion, contamination, strain_heterogeneity) %>% mutate(Genus = gsub("g__", "", Genus), Family = gsub("f__", "", Family)) %>% mutate_if(is.character, list(~na_if(.,""))) 
+
+# code replacing NAs in 'Genus' with the values from adjacent column 'Family'
+bwa_lr_tax$Genus[is.na(bwa_lr_tax$Genus)] <- bwa_lr_tax$Family[is.na(bwa_lr_tax$Genus)]
+bwa_sr_tax$Genus[is.na(bwa_sr_tax$Genus)] <- bwa_sr_tax$Family[is.na(bwa_sr_tax$Genus)]
+bwa_merged_tax$Genus[is.na(bwa_merged_tax$Genus)] <- bwa_merged_tax$Family[is.na(bwa_merged_tax$Genus)]
+mmi_lr_tax$Genus[is.na(mmi_lr_tax$Genus)] <- mmi_lr_tax$Family[is.na(mmi_lr_tax$Genus)]
+mmi_sr_tax$Genus[is.na(mmi_sr_tax$Genus)] <- mmi_sr_tax$Family[is.na(mmi_sr_tax$Genus)]
+mmi_merged_tax$Genus[is.na(mmi_merged_tax$Genus)] <- mmi_merged_tax$Family[is.na(mmi_merged_tax$Genus)]
+flye_tax$Genus[is.na(flye_tax$Genus)] <- flye_tax$Family[is.na(flye_tax$Genus)]
+megahit_tax$Genus[is.na(megahit_tax$Genus)] <- megahit_tax$Family[is.na(megahit_tax$Genus)]
+
+# making the Genus names unique for easier tracking and merging downstream
+bwa_lr_tax <- bwa_lr_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(-Family)
+bwa_sr_tax <- bwa_sr_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(-Family)
+bwa_merged_tax <- bwa_merged_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(-Family)
+mmi_lr_tax <- mmi_lr_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(-Family)
+mmi_sr_tax <- mmi_sr_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(-Family)
+mmi_merged_tax <- mmi_merged_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(-Family)
+flye_tax <- flye_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(-Family)
+megahit_tax <- megahit_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(-Family)
+
+# combining all samples based on the Genus, and making additional column with sample name
+bwa_lr_final <- bwa_lr_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(Genus) %>% mutate(bwa_lr = Genus)
+bwa_sr_final <- bwa_sr_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(Genus) %>% mutate(bwa_sr = Genus)
+bwa_merged_final <- bwa_merged_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(Genus) %>% mutate(bwa_merged = Genus)
+mmi_lr_final <- mmi_lr_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(Genus) %>% mutate(mmi_lr = Genus)
+mmi_sr_final <- mmi_sr_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(Genus) %>% mutate(mmi_sr = Genus)
+mmi_merged_final <- mmi_merged_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(Genus) %>% mutate(mmi_merged = Genus)
+flye_final <- flye_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(Genus) %>% mutate(flye = Genus)
+megahit_final<- megahit_tax %>% mutate(Genus = make.unique(Genus)) %>%  select(Genus) %>% mutate(megahit = Genus)
+
+merged_2019_tax <- full_join(x = bwa_lr_final, y = bwa_sr_final,
+                         by = "Genus") %>%
+  full_join( x = ., y = bwa_merged_final, by = "Genus") %>%
+  full_join( x = ., y = mmi_lr_final, by = "Genus") %>%
+  full_join( x = ., y = mmi_sr_final, by = "Genus") %>%
+  full_join( x = ., y = mmi_merged_final, by = "Genus") %>%
+  full_join( x = ., y = flye_final, by = "Genus")%>%
+  full_join( x = ., y = megahit_final, by = "Genus") 
+# %>% arrange(Genus) %>% replace(is.na(.), 0)
+
+long_tax <- pivot_longer(merged_2019_tax, cols = bwa_lr:megahit, names_to = "samples")
+
+# setting a common theme
+theme_set(theme_bw() +
+            theme(strip.background = element_rect(colour = "black", fill = "white"), strip.text = element_text(size=17, family="Helvetica")) +
+            theme(axis.text.x = element_text(angle = 90, size = 18, family = "Helvetica"),
+                  axis.text.y = element_text(size = 18, family = "Helvetica"),
+                  axis.title.y = element_text(size = 18, face = "bold", family = "Helvetica"),
+                  axis.title.x=element_text(size = 18, face = "bold", family = "Helvetica"),
+                  axis.ticks.x=element_blank()) +
+            theme(plot.title = element_text(hjust = 0.5)) +
+            theme(legend.text = element_text(size = 18, family = "Helvetica"),
+                  legend.title = element_text(size = 18, family = "Helvetica", face = "bold"), legend.direction = "vertical", legend.box = "vertical")) 
+
+# plotting
+ggplot(long_tax, aes(x = samples, y = value))
+long_tax  <- long_tax %>% drop_na()
+
+long_tax %>% 
+ # group_by(Genus) %>%
+ # mutate(rescale = scales::rescale(Sqrt.abundance)) %>%
+  ggplot(., aes(x = factor(samples), y = value)) +
+  geom_tile(aes(fill = Genus), color = "white") +
+#  scale_alpha(range = c(0.1, 1)) +
+  theme(legend.position = "none") +
+  #  xlab(label = "Library Preparation Method") +
+  facet_grid(~ samples, switch = "x", scales = "free_x", space = "free_x") +
+  # scale_fill_gradient(name = "Sqrt(Abundance)",
+  #                     low = "#FFFFFF",
+  #                     high = "#012345") +
+  theme(strip.placement = "outside") +
+  theme(axis.text.x = element_blank(),
+        axis.text.y = element_text(color = scales::hue_pal()(length(levels(as.factor(long_tax$Genus)))))) +
+  ggtitle(label = "2019_GDB - MAGs")
+
+# Summary table
+View(merged_2019_tax)
+
+ggplot(data.frame(long_tax), aes(x=samples, fill=Genus)) + 
+  geom_bar(position = "fill") 
+#+   scale_fill_brewer(palette = "Dark2")
+
diff --git a/figures/src_SBB/mappability_index-2020-05-26.R b/figures/src_SBB/mappability_index-2020-05-26.R
new file mode 100644
index 0000000000000000000000000000000000000000..c90bca4a718785ed073b54633d00510949da6030
--- /dev/null
+++ b/figures/src_SBB/mappability_index-2020-05-26.R
@@ -0,0 +1,144 @@
+setwd("~/Documents/R_data/nanopore_ONT/")
+library(tidyverse)
+
+map_1000 <- read.delim("mappability_over_1000bp.txt") %>% separate(sample, c("sample", "mapped_reads", "total", "percent_mapped"), sep = " ", remove = FALSE) %>% mutate(sample = str_replace_all(sample, "_counts.txt", ""))
+map_1000$mapped_reads = NULL
+map_1000$total = NULL
+
+map_1500 <- read.delim("mappability_over_1500bp.txt") %>% separate(sample, c("sample", "mapped_reads", "total", "percent_mapped"), sep = " ", remove = FALSE) %>% mutate(sample = str_replace_all(sample, "_counts.txt", ""))
+map_1500$mapped_reads = NULL
+map_1500$total = NULL
+
+map_2000 <- read.delim("mappability_over_2000bp.txt") %>% separate(sample, c("sample", "mapped_reads", "total", "percent_mapped"), sep = " ", remove = FALSE) %>% mutate(sample = str_replace_all(sample, "_counts.txt", ""))
+map_2000$mapped_reads = NULL
+map_2000$total = NULL
+
+map_5000 <- read.delim("mappability_over_5000bp.txt") %>% separate(sample, c("sample", "mapped_reads", "total", "percent_mapped"), sep = " ", remove = FALSE) %>% mutate(sample = str_replace_all(sample, "_counts.txt", ""))
+map_5000$mapped_reads = NULL
+map_5000$total = NULL
+
+# merging all the files
+merged_mappability <- full_join(x = map_1000, y = map_1500,by = "sample") %>%
+  full_join( x = ., y = map_2000, by = "sample") %>%
+  full_join( x = ., y = map_5000, by = "sample")
+
+columns <- c("samples", "bp1000", "bp1500", "bp2000", "bp5000")
+colnames(merged_mappability) <- columns
+write.csv(merged_mappability, "merged_mappability_index.csv", quote = FALSE)
+
+# long format
+map_long <- pivot_longer(merged_mappability, cols = bp1000:bp5000, names_to = "cutoff", values_to = "count") %>% mutate(samples = str_replace_all(samples, "_counts.txt", ""))
+
+samps <- c("flye", "megahit", "hybrid_bwa_sr", "hybrid_bwa_lr", "hybrid_bwa_merged",  
+           "hybrid_mmi_sr", "hybrid_mmi_lr", "hybrid_mmi_merged", "megahit_metaT_sr", "hybrid_metaT_sr")
+write.csv(map_long, "merged_mappability_long_format.csv", quote = FALSE)
+
+### Plotting ###
+# setting a common theme for plotting
+theme_set(theme_bw() +
+            theme(strip.background = element_rect(colour = "black", fill = "white"), strip.text = element_text(size=17, family="Helvetica")) +
+            theme(axis.text.x = element_text(angle = 90, size = 18, family = "Helvetica"),
+                  axis.text.y = element_text(size = 18, family = "Helvetica"),
+                  axis.title.y = element_text(size = 18, face = "bold", family = "Helvetica"),
+                  axis.title.x=element_text(size = 18, face = "bold", family = "Helvetica"),
+                  axis.ticks.x=element_blank()) +
+            theme(plot.title = element_text(hjust = 0.5)) +
+            theme(legend.text = element_text(size = 18, family = "Helvetica"),
+                  legend.title = element_text(size = 18, family = "Helvetica", face = "bold"), legend.direction = "vertical", legend.box = "vertical")) 
+
+# Bubble plot
+map_long %>% 
+  mutate(samples = factor(samples, levels = samps)) %>% 
+  ggplot(aes(x=samples, y=cutoff, size = as.numeric(count), color = cutoff)) +
+  geom_point(alpha=0.7) +
+  scale_size(range = c(.1, 15), name="percent_mapped") +
+  facet_grid(.~factor(samples, levels = c("flye", "megahit", "hybrid_bwa_sr", "hybrid_bwa_lr", "hybrid_bwa_merged", "hybrid_mmi_sr", "hybrid_mmi_lr", "hybrid_mmi_merged", "megahit_metaT_sr", "hybrid_metaT_sr")), scales = "free_x", space = "free_x") +
+  guides(color = FALSE)
+
+# without facet
+map_long %>% 
+  mutate(samples = factor(samples, levels = c("flye", "megahit", "hybrid_bwa_sr", "hybrid_bwa_lr", "hybrid_bwa_merged", "hybrid_mmi_sr", "hybrid_mmi_lr", "hybrid_mmi_merged", "megahit_metaT_sr", "hybrid_metaT_sr"))) %>% 
+  ggplot(aes(x=samples, y=cutoff, size = as.numeric(count), color = cutoff)) +
+  geom_point(alpha=0.7) +
+  scale_size(range = c(.1, 15), name="percent_mapped") +
+  guides(color = FALSE)
+
+# heatmap
+ggplot(data = map_long, mapping = aes(x = samples,
+                                                  y = cutoff,
+                                                  fill = as.numeric(count))) +
+  geom_tile() +
+  xlab(label = "Sample") + 
+  scale_fill_gradient(name = "percent_mapped",
+                      low = "#FFFFFF",
+                      high = "#012345")
+
+# Color Brewer palette
+library(viridis)
+ggplot(map_long, aes(x = samples,
+                                 y = cutoff,
+                                 fill = as.numeric(count))) + 
+  geom_tile() +
+  scale_fill_viridis(discrete=FALSE) +
+  theme_ipsum()
+
+###############
+# Tables in R #
+###############
+# Trying tables, from here: https://www.littlemissdata.com/blog/prettytables
+library(data.table)
+library(dplyr)
+# install.packages("formattable")
+library(formattable)
+library(tidyr)
+
+#Set a few color variables to make our table more visually appealing
+customGreen0 = "#DeF7E9"
+customGreen = "#71CA97"
+customRed = "#ff7f7f"
+
+# working with our "real data"
+i1 <- merged_mappability %>% arrange(factor(samples, levels = c("flye", "megahit", "hybrid_bwa_sr", "hybrid_bwa_lr", "hybrid_bwa_merged", "hybrid_mmi_sr", "hybrid_mmi_lr", "hybrid_mmi_merged", "megahit_metaT_sr", "hybrid_metaT_sr")))
+
+formattable(i1)
+#1)  First Data Table
+formattable(i1, 
+            align =c("l","c","c","c","c"), 
+            list(`samples` = formatter(
+              "span", style = ~ style(color = "darkblue",font.weight = "bold")) 
+            ))
+
+#2) Add the color mapping for all 2011 to 2016.
+formattable(i1, 
+            align =c("l","c","c","c","c"), 
+            list(`samples` = formatter(
+              "span", style = ~ style(color = "darkblue",font.weight = "bold")), 
+  `bp1000`= color_tile(customGreen, customGreen0),
+  `bp1500`= color_tile(customGreen, customGreen0),
+  `bp2000`= color_tile(customGreen, customGreen0),
+  `bp5000`= color_tile(customGreen, customGreen0)
+))
+
+#4) Add custom formatter to improvement over time
+percent_formatter <- 
+  formatter("span", 
+            style = x ~ style(
+              font.weight = "bold", 
+              color = ifelse(x >= 90, customGreen, ifelse(x < 5, customRed, ifelse(x <= 79,"grey", "blue")))))
+
+formattable(i1, 
+            align =c("l","c","c","c","c"), 
+            list(`samples` = formatter(
+              "span", style = ~ style(color = "darkblue",font.weight = "bold")), 
+              `bp1000`= percent_formatter,
+              `bp1500`= percent_formatter,
+              `bp2000`= percent_formatter,
+              `bp5000`= percent_formatter
+            ))
+
+#####
+# Reactable format table
+# https://glin.github.io/reactable/index.html
+#install.packages("reactable")
+library(reactable)
+reactable(merged_mappability)
diff --git a/figures/src_SBB/mmseq_upset_plot_w_metaspades.R b/figures/src_SBB/mmseq_upset_plot_w_metaspades.R
new file mode 100644
index 0000000000000000000000000000000000000000..cbdbe2c9871bd122dc73bc8551e213d7edad985b
--- /dev/null
+++ b/figures/src_SBB/mmseq_upset_plot_w_metaspades.R
@@ -0,0 +1,130 @@
+library(cowplot)
+library(tidyverse)
+setwd("~/Documents/R_data/nanopore_ONT/")
+overlap_sizes <- read.table("~/Documents/R_data/nanopore_ONT/overlap_sizes.txt")
+Total <- read.table("~/Documents/R_data/nanopore_ONT/total.txt")
+
+sets <- c("flye", "megahit", "metaspades", "metaspades_hybrid")
+category <- c(
+  "flye",
+  "megahit",
+  "metaspades",
+  "metaspades_hybrid",
+  "flye_megahit",
+  "flye_metaspades",
+  "flye_metaspades_hybrid",
+  "megahit_metaspades",
+  "megahit_metaspades_hybrid",
+  "metaspades_metaspades_hybrid",
+  "flye_megahit_metaspades_metaspades_hybrid"
+)
+
+Overlap <- expand.grid(  #generate all the combinations
+  "sets" = sets,
+  "category" = category
+) %>%
+  as.data.frame()  %>%    #determine the intersection: a character col of Y or N.
+  mutate(intersect = case_when(
+    str_detect(category, sets %>% as.character()) ~ "Y",
+    T ~ "N"
+  ))
+
+Overlap[15,3] = "N" 
+Overlap[27,3] = "N"
+Overlap[35,3] = "N"
+
+# Upper-left plot
+upperleft <- Total %>%
+  ggplot(aes(x = V1, y= V2)) +
+  geom_hline(yintercept = -Inf, size = 1.5) +
+  geom_vline(xintercept = -Inf, size = 1.5) +
+  geom_bar(stat = "identity", aes(fill = V1), alpha = 0.8) +
+  geom_text(aes(label = as.character(V2)), size = 5, angle = 90, hjust = 0, y = 1, fontface = "bold") +
+  scale_fill_manual(values = c("orangered3", "seagreen", "grey", "dodgerblue2"), #this is my own custom palette
+                    limits = c("flye", "megahit", "metaspades", "metaspades_hybrid")) +              #feel free to use something else
+  scale_x_discrete(labels = NULL) +
+  scale_y_continuous(labels = NULL) +
+  labs(x = NULL,
+       y = "set size") +
+  theme_minimal() +
+  theme(legend.position = "none") +
+  theme(text = element_text(size= 16, face="bold")) +
+  theme(axis.text.x=element_text(colour = "black", angle = 45, hjust = 1)) +
+  theme(axis.text.y=element_text(colour = "black")) +
+  theme(panel.grid = element_blank())
+upperleft
+
+# Lower-left plot
+Overlap$category <- factor(Overlap$category)
+lowerleft <- Overlap %>%
+  mutate(category = factor(rev(Overlap$category))) %>% #in the colored matrix the first y value appears in the bottom, so the order need to be reversed
+  ggplot(aes(x = sets, y = category))+
+  geom_tile(aes(fill = sets, alpha = intersect), color = "black", size = 1.5) +
+  scale_fill_manual(values = c("orangered1", "seagreen", "grey", "dodgerblue2"),
+                    limits = c("flye", "megahit", "metaspades", "metaspades_hybrid")) +
+  scale_alpha_manual(values = c(0.8, 0),  #color the grid for Y, don't color for N.
+                     limits = c("Y", "N")) +
+  scale_y_discrete(labels = NULL) +
+  scale_x_discrete(labels = rep(" ", length(Overlap$sets))) + #I left white space here for better alignment w/ extended plots
+  labs(x = " ", #white space for better alignment w/ right side plots
+       y = "overlap") +
+  theme_minimal() +
+  theme(legend.position = "none") +
+  theme(text = element_text(size= 16, face="bold")) +
+  theme(axis.text.x=element_text(colour = "black")) +
+  theme(axis.text.y=element_text(colour = "black")) +
+  theme(panel.grid = element_blank())
+lowerleft
+
+# Upper-right plot
+Assembly_method <- c("flye", "megahit", "metaspades", "metaspades_hybrid")
+upperright <- get_legend(
+  Total %>%
+    ggplot(aes(x = V1, y= V2)) +
+    geom_hline(yintercept = -Inf, size = 1.5) +
+    geom_vline(xintercept = -Inf, size = 1.5) +
+    geom_bar(stat = "identity", aes(fill = Assembly_method), alpha = 0.9) +
+    geom_text(aes(label = "Assembly method"), size = 40, angle = 90, hjust = 0, y = 1, fontface = "bold") +
+    scale_fill_manual(values = c("orangered1", "seagreen", "grey", "dodgerblue2"), #this is my own custom palette
+                      limits = c("flye", "megahit", "metaspades", "metaspades_hybrid")) +              #feel free to use something else
+    scale_x_discrete(labels = NULL) +
+    scale_y_continuous(labels = NULL) +
+    labs(x = NULL,
+         y = "set size") +
+    theme_minimal() +
+    theme(legend.position = "right") +
+    theme(text = element_text(size= 16, face="bold")) +
+    theme(axis.text.x=element_text(colour = "black", angle = 45, hjust = 1)) +
+    theme(axis.text.y=element_text(colour = "black")) +
+    theme(panel.grid = element_blank())
+)
+plot_grid(upperright)
+
+# Lower-right plot
+overlap_sizes <- overlap_sizes %>% mutate(V2 = factor(V2, levels = rev(V2))) #the order needs to be reversed for the figure to match
+lowerright <- overlap_sizes %>%
+  ggplot(aes(x = V2, y = V1)) +
+  geom_hline(yintercept = -Inf, size = 1.5) +
+  geom_vline(xintercept = -Inf, size = 1.5) +
+  geom_bar(stat = "identity", fill = "grey80", color = NA, alpha = 0.8) +
+  geom_text(aes(label = V1, y = 0), size = 5, hjust = 0, vjust = 0.5, fontface = "bold") +
+  scale_y_continuous(breaks = c(0, max(overlap_sizes$V1)) ,
+                     labels = rep(" ", 2)) + #I left white space here for better alignment w/ extended plots
+  scale_x_discrete(labels = NULL) +
+  labs(y = "intersect sizes",
+       x = NULL) +
+  theme_minimal() +
+  theme(text = element_text(size= 15, face="bold")) +
+  theme(axis.text.x=element_text(colour = "black", angle = 45, hjust = 1)) +
+  theme(axis.text.y=element_text(colour = "black")) +
+  theme(panel.grid = element_blank()) +
+  coord_flip()
+lowerright
+
+pdf(file="ONT_protein_assembler_mmseq2_Upset_plot_w_metaspades.pdf")
+plot_grid(upperleft, upperright, lowerleft, lowerright,
+          nrow = 2,
+          ncol = 2,
+          rel_heights = c(0.95, 1.5), #the more rows in the lower part, the longer it should be
+          rel_widths = c(0.95, 0.95))
+dev.off()