Skip to content

Instantly share code, notes, and snippets.

@rshk
Created March 14, 2014 17:21
Show Gist options
  • Save rshk/9552471 to your computer and use it in GitHub Desktop.
Save rshk/9552471 to your computer and use it in GitHub Desktop.
import io
import re
import pytest
test_cases = [
(r'eggs:spam:bacon', ['eggs', 'spam', 'bacon']),
(r'eggs\:spam:bacon', ['eggs:spam', 'bacon']),
(r'eggs\\:spam:bacon', ['eggs\\', 'spam', 'bacon']),
(r'eggs\\\:spam:bacon', ['eggs\\:spam', 'bacon']),
(r'eggs::spam:bacon', ['eggs', '', 'spam', 'bacon']),
(r':eggs:spam:bacon', ['', 'eggs', 'spam', 'bacon']),
(r'eggs:spam:bacon:', ['eggs', 'spam', 'bacon', '']),
(r'::eggs:spam:bacon', ['', '', 'eggs', 'spam', 'bacon']),
(r'eggs:spam:bacon::', ['eggs', 'spam', 'bacon', '', '']),
]
def split_string(s):
## any character preceded by backslash, or anything
## that's not a backslash or a colon
buf = io.BytesIO()
for ch in re.findall(r'\\.|.', s):
if ch == ':':
yield buf.getvalue()
buf.seek(0)
buf.truncate()
else:
if len(ch) == 2:
assert ch[0] == '\\'
ch = ch[1]
assert len(ch) == 1
buf.write(ch)
yield buf.getvalue()
@pytest.mark.parametrize('instring,output', test_cases)
def test_string_tokenization(instring, output):
assert list(split_string(instring)) == output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment