celestialphineas · March 19, 2019 18:04
diff --git a/dnf.py b/dnf.py
 #!/usr/bin/python

 import sys
 from Bio import SeqIO
 from itertools import groupby, product

 def canonical(x):
    rules = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    paired = ''.join(map(lambda x: rules[x], list(x[::-1])))
    return paired if paired < x else x

 def dnf(x):
    symbols = sorted(list(set(map(canonical, [''.join(i) for i in product('ACGT',repeat=2)]))))
    splitted = [canonical(x[i:i+2]) for i in range(0, len(x) - 1)]
    counts = [(i, len(list(c))) for i, c in groupby(sorted(splitted))]
    count_dict = dict((x, y) for x, y in counts)
    return map(lambda x: count_dict[x] if x in count_dict else 0, symbols)

 with open(sys.argv[2], 'w') as output:
    for seq_record in SeqIO.parse(sys.argv[1], "fasta"):
        result = reduce(lambda x, y: str(x) + '\t' + str(y), dnf(str(seq_record.seq))) + '\n'
        output.write(result)
	#!/usr/bin/python

	import sys
	from Bio import SeqIO
	from itertools import groupby, product

	def canonical(x):
	rules = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
	paired = ''.join(map(lambda x: rules[x], list(x[::-1])))
	return paired if paired < x else x

	def dnf(x):
	symbols = sorted(list(set(map(canonical, [''.join(i) for i in product('ACGT',repeat=2)]))))
	splitted = [canonical(x[i:i+2]) for i in range(0, len(x) - 1)]
	counts = [(i, len(list(c))) for i, c in groupby(sorted(splitted))]
	count_dict = dict((x, y) for x, y in counts)
	return map(lambda x: count_dict[x] if x in count_dict else 0, symbols)

	with open(sys.argv[2], 'w') as output:
	for seq_record in SeqIO.parse(sys.argv[1], "fasta"):
	result = reduce(lambda x, y: str(x) + '\t' + str(y), dnf(str(seq_record.seq))) + '\n'
	output.write(result)