Created
April 17, 2015 11:41
-
-
Save davidshepherd7/2857bfc620a648a90e7f to your computer and use it in GitHub Desktop.
python string split iterator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
import sys | |
import re | |
def isplit(string, delimiter = None): | |
"""Like string.split but returns an iterator (lazy) | |
Multiple character delimters are not handled. | |
""" | |
if delimiter is None: | |
# Handle whitespace by default | |
delim = r"\s" | |
elif len(delimiter) != 1: | |
raise ValueError("Can only handle single character delimiters", delimiter) | |
else: | |
# Escape, incase it's "\", "*" etc. | |
delim = re.escape(delimiter) | |
return (x.group(0) for x in re.finditer(r"[^{}]+".format(delim), string)) | |
def main(): | |
# Wrapper to make it a list | |
def helper(*args, **kwargs): | |
return list(isplit(*args, **kwargs)) | |
# Normal delimiters | |
assert helper("1,2,3", ",") == ["1", "2", "3"] | |
assert helper("1;2;3,", ";") == ["1", "2", "3,"] | |
assert helper("1;2 ;3, ", ";") == ["1", "2 ", "3, "] | |
# Whitespace | |
assert helper("1 2 3") == ["1", "2", "3"] | |
assert helper("1\t2\t3") == ["1", "2", "3"] | |
assert helper("1\t2 \t3") == ["1", "2", "3"] | |
assert helper("1\n2\n3") == ["1", "2", "3"] | |
# Surrounding whitespace dropped | |
assert helper(" 1 2 3 ") == ["1", "2", "3"] | |
# Regex special characters | |
assert helper(r"1\2\3", "\\") == ["1", "2", "3"] | |
assert helper(r"1*2*3", "*") == ["1", "2", "3"] | |
# No multi-char delimiters allowed | |
try: | |
helper(r"1,.2,.3", ",.") | |
assert False | |
except ValueError: | |
pass | |
return 0 | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment