Skip to content

Instantly share code, notes, and snippets.

@moonwatcher
Last active August 29, 2015 14:20
Show Gist options
  • Save moonwatcher/ce9946e7a59c720fd4bf to your computer and use it in GitHub Desktop.
Save moonwatcher/ce9946e7a59c720fd4bf to your computer and use it in GitHub Desktop.
Extract a sequence from a FASTA file by coordinates
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Extract a sequence by coordinates from a FASTA file
# Author: Lior Galanti < [email protected] >
# NYU Center for Genetics and System Biology 2015
#
# crop is free software; you can redistribute it and/or modify it under the terms of
# the GNU General Public License as published by the Free Software Foundation;
# either version 2 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with this program.
# If not, see <http://www.gnu.org/licenses/>.
import sys
import io
from io import StringIO
def usage():
print('usage: path int:start int:end')
def crop(path, start, end):
reference = StringIO()
with io.open(path, 'r') as file:
try:
for line in file:
if reference.tell() > end:
break
line = line.strip()
if line[0] != '>':
reference.write(line)
reference.seek(start)
print(reference.read(end - start))
except OSError as e:
print('{} {}'.format(e.strerror, path))
if len(sys.argv) == 4:
path = sys.argv[1]
try:
start = int(sys.argv[2])
end = int(sys.argv[3])
except ValueError as e:
usage()
else:
crop(path, start, end)
else:
usage()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment