Skip to content

Commit cea3f8e

Browse files
committed
Merge pull request #12 from bioinform/tolerate-missing-ins
Tolerate missing insertion sequences
2 parents 88575cb + 8cf3cdb commit cea3f8e

File tree

3 files changed

+41
-7
lines changed

3 files changed

+41
-7
lines changed

breakseq2/biopy/io/SV.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python
22

3-
import sys, os
3+
import sys, os, logging
44
import Fasta
55
import GFF
66

@@ -124,8 +124,13 @@ def get_flanks(self):
124124

125125

126126
def parse(gff_file, base=None):
127+
logger = logging.getLogger(parse.__name__)
128+
127129
ins_file=gff_file.replace(".gff","")+".ins"
128130
insertions=None if not os.path.exists(ins_file) else Fasta.parse(ins_file, todict=True)
131+
if insertions is None:
132+
logger.warn("Insertion sequence file %s missing" % ins_file)
133+
129134
calls=[]
130135
for entry in open(gff_file, "r"):
131136
if entry.startswith("#"): continue
@@ -137,7 +142,7 @@ def parse(gff_file, base=None):
137142
del call.attributes["Iseq"]
138143
calls.append(call)
139144
except:
140-
print >> sys.stderr, "Unable to parse line: %s" % entry
145+
logger.error("Unable to parse line: %s" % entry)
141146
raise
142147
return calls
143148

breakseq2/breakseq_index.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import argparse
22
import sys
33
import os
4+
import logging
45
from biopy.io import Fasta
56
from biopy.io import SV
67

@@ -21,15 +22,28 @@ def get_seq(sv, jn_type, seq, format_version):
2122
return ">%s:%s:%s\n%s" % (sv.id, sv.size(), jn_type, seq)
2223

2324

24-
def generate_bplib(gff, reference, output, junction_length=DEFAULT_JUNCTION_LENGTH, format_version=DEFAULT_FORMAT_VERSION):
25+
def generate_bplib(gff, reference, output, junction_length=DEFAULT_JUNCTION_LENGTH, format_version=DEFAULT_FORMAT_VERSION, chromosomes=[]):
26+
logger = logging.getLogger(generate_bplib.__name__)
27+
2528
if not gff or not os.path.isfile(gff):
29+
logger.error("GFF file unspecified of missing")
2630
raise Exception("GFF file unspecified or missing")
2731

2832
outfd = open(output, "w") if output else sys.stdout
29-
insertion_sequence_file = gff.replace(".gff", "") + ".ins";
30-
if not os.path.isfile(insertion_sequence_file):
31-
raise Exception("Insertion sequence file %s missing" % insertion_sequence_file)
33+
34+
ins_file = gff.replace(".gff", "") + ".ins"
35+
ins_file_absent = not os.path.isfile(ins_file)
36+
if ins_file_absent:
37+
logger.error("Insertion sequence file %s not found. Insertions will be skipped" % ins_file)
38+
3239
for sv in SV.parse(gff, Fasta.Seqs(reference, junction_length)):
40+
if chromosomes and sv.name not in chromosomes:
41+
continue
42+
43+
if sv.is_insertion() and ins_file_absent:
44+
logger.warn("Omitting entry %s due to missing insertion sequence file" % sv.id)
45+
continue
46+
3347
flanks = sv.get_flanks()
3448
if sv.is_insertion():
3549
if flanks[0] is None or flanks[1] is None:

scripts/breakseq2_gen_bplib.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#!/usr/bin/env python
22

3+
import sys
34
import argparse
5+
import logging
46
from breakseq2 import breakseq_index, _version
57

68
if __name__ == "__main__":
@@ -9,7 +11,20 @@
911
breakseq_index.add_options(parser)
1012
parser.add_argument("--reference", help="Reference FASTA", required=True)
1113
parser.add_argument("--output", help="Output FASTA to generate. Leave unspecified for stdout")
14+
parser.add_argument("--chromosomes", nargs="+", help="List of chromosomes to process", default=[])
1215
parser.add_argument('--version', action='version', version='%(prog)s ' + _version.__version__)
16+
parser.add_argument('--log', help="Log level", default="INFO")
17+
1318
args = parser.parse_args()
1419

15-
breakseq_index.generate_bplib(args.bplib_gff, args.reference, args.output, args.junction_length, args.format_version)
20+
loglevel = getattr(logging, args.log.upper(), None)
21+
if not isinstance(loglevel, int):
22+
raise ValueError('Invalid log level: %s' % args.log)
23+
24+
FORMAT = '%(levelname)s %(asctime)-15s %(name)-20s %(message)s'
25+
logging.basicConfig(level=loglevel, format=FORMAT)
26+
27+
logger = logging.getLogger(__file__)
28+
logger.info("Command-line: " + " ".join(sys.argv))
29+
30+
breakseq_index.generate_bplib(args.bplib_gff, args.reference, args.output, args.junction_length, args.format_version, args.chromosomes)

0 commit comments

Comments
 (0)