Skip to content
Snippets Groups Projects
Commit a12b69cc authored by Valentina Galata's avatar Valentina Galata
Browse files

crispr data/fig: filter casc results (issue #19)

parent 25b450c3
No related branches found
No related tags found
2 merge requests!71Master,!70Add latest figure updates
......@@ -164,7 +164,7 @@ rule fig_crispr_data:
import os
import re
import pandas
from src.utils import parse_minced_report
from src.utils import parse_minced_report, parse_casc_report
summary = []
for ifile_path in input:
ifile_df = None
......@@ -177,11 +177,9 @@ rule fig_crispr_data:
asm_tool = os.path.basename(ifile_path).split(".")[0]
# file content
if crispr_tool == "minced":
ifile_df = parse_minced_report(ifile_path)
ifile_df = pandas.DataFrame(ifile_df)
ifile_df = pandas.DataFrame(parse_minced_report(ifile_path))
elif crispr_tool == "casc":
ifile_df = pandas.read_csv(ifile_path, sep="\t", header=0, usecols=["#seq_id", "spacers", "array_start", "array_stop"], comment=None)
ifile_df.rename(columns={"#seq_id": "seq_id"}, inplace=True)
ifile_df = parse_casc_report(ifile_path)
ifile_df = ifile_df.assign(crispr_tool=crispr_tool)
ifile_df = ifile_df.assign(asm_tool=asm_tool)
summary.append(ifile_df)
......
#!/usr/bin/python
def parse_casc_report(ifile_path):
import pandas
# read in
summary = pandas.read_csv(ifile_path, sep="\t", header=0, usecols=["#seq_id", "spacers", "array_start", "array_stop", "bonafide"], comment=None)
# rename columns
summary.rename(columns={"#seq_id": "seq_id"}, inplace=True)
# filter
summary = summary[summary["bonafide"]]
assert all(summary["bonafide"]) # just a check
# drop columns
summary.drop(columns=["bonafide"], inplace=True)
return summary
def parse_minced_report(ifile_path):
"""
Parse the MINCED report: for each sequence and found CRISPR array, extract
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment