diff --git a/workflow_zymo/Snakefile b/workflow_zymo/Snakefile index ce0507e84e17feb8974f89c3cfe63d02280b0f63..abffb2a29be1c653635dc728a2c995d763cafcb3 100644 --- a/workflow_zymo/Snakefile +++ b/workflow_zymo/Snakefile @@ -238,12 +238,13 @@ rule comparison: for fname in input.cdhit: fname_col=os.path.basename(fname).split("__")[0] # column name to be used fname_pids=[] - with open(input.cdhit,'r') as fi: + with open(fname,'r') as fi: for ln in fi: if ln.startswith('>'): - fname_pids.append(re.sub("^>", "", ln.strip().split(" ")[0])) - + nam=re.sub("^>", "", ln.strip().split(" ")[0]) + fname_pids.append(nam.strip().split("__")[2]) + df.loc[fname_pids,fname_col] = True # save - df.to_csv(output[0], sep='\t', header=0) \ No newline at end of file + df.to_csv(output[0], sep='\t', header=0)