Skip to content

Instantly share code, notes, and snippets.

@bencharb
Last active December 4, 2015 15:13
Show Gist options
  • Select an option

  • Save bencharb/792570cd2d3dc379dd9e to your computer and use it in GitHub Desktop.

Select an option

Save bencharb/792570cd2d3dc379dd9e to your computer and use it in GitHub Desktop.
pull_extensions
"""
Pop off extensions by name or by number to pop off
pull_extensions('filenamewith.ext.csv.gz', 2) # 'filenamewith.ext'
pull_extensions('filenamewith.ext.csv.gz', 'csv.gz') # 'filenamewith.ext'
pull_extensions('filenamewith.ext.csv.gz', 'csv') # 'filenamewith.ext.gz'
"""
import itertools
def remove_from_end(list_, val):
list_last_index = lambda v: len(list_)-list(reversed(list_)).index(v)-1
list_.pop(list_last_index(val))
return list_
def pull_extension_values(filename, *exts):
original_filename = filename
exts = itertools.chain.from_iterable([e.split('.') for e in exts])
exts = list(reversed(list(exts)))
parts = filename.split('.')
if len(exts) >= len(parts):
return ''
for ext in exts:
if ext is None or ext not in filename or '.' not in filename:
continue
if ext.startswith('.'):
ext = ext[1:]
parts = remove_from_end(parts, ext)
filename = '.'.join(parts)
parts = filename.split('.')
return filename
def pull_extensions_from_end(filename, number_of_extentions=None):
parts = filename.split('.')
if number_of_extentions is None:
number_of_extentions = len(parts) - 1
if number_of_extentions <= 0:
return filename
if number_of_extentions >= len(parts):
return ''
filename, parts = parts[0:1], parts[1:-number_of_extentions]
return '.'.join(filename+parts)
def pull_extensions(filename, *exts):
"""
@param exts:
(1) list of extensions to pull, like ('csv', 'gz',)
(2) string of extensions to pull, like 'csv.gz'
(3) integer of number of extensions to pull from end
Examples:
pull_extensions('filenamewith.ext.csv.gz', 2) # 'filenamewith.ext'
pull_extensions('filenamewith.ext.csv.gz', 'csv.gz') # 'filenamewith.ext'
pull_extensions('filenamewith.ext.csv.gz', 'csv') # 'filenamewith.ext.gz'
"""
if not len(exts):
exts = (None,)
is_ext = isinstance(exts[0], (basestring,tuple,list,))
is_ext_count = isinstance(exts[0], (int,type(None),))
if is_ext:
return pull_extension_values(filename, *exts)
if is_ext_count:
assert len(exts) == 1, 'Must pass only one integer for number of extensions'
return pull_extensions_from_end(filename, exts[0])
raise Exception('Failed to evaluate exts argument %s' % exts)
def test_pull_extensions():
assert pull_extensions('filenamewith.ext.csv.gz', 'csv.gz') == 'filenamewith.ext'
assert pull_extensions('filenamewith.ext.csv.gz', 'csv') == 'filenamewith.ext.gz'
assert pull_extensions('filenamewith.ext.csv.gz', 'filenamewith.ext.csv.gz') == ''
assert pull_extensions('filenamewith.ext.csv.gz', 2) == 'filenamewith.ext'
assert pull_extensions('filenamewith.ext.csv.gz', 1) == 'filenamewith.ext.csv'
assert pull_extensions('filenamewith.ext.csv.gz', 4) == ''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment