-
-
Save andrewsmedina/1431867 to your computer and use it in GitHub Desktop.
Create slugs using Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
test: | |
clear | |
nosetests --with-coverage --cover-package slugfy test_slugfy.py | |
clean: | |
find -regex '.*\.pyc' -exec rm {} \; | |
find -regex '.*~' -exec rm {} \; | |
.PHONY: test clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#coding: utf-8 | |
from unicodedata import normalize | |
def slug(text, encoding=None): | |
if isinstance(text, str): | |
text = text.decode(encoding or 'ascii') | |
clean_text = text.strip().replace(' ', '-') | |
while '--' in clean_text: | |
clean_text = clean_text.replace('--', '-') | |
ascii_text = normalize('NFKD', clean_text).encode('ascii', 'ignore') | |
return ascii_text.lower() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
import unittest | |
from slugfy import slug | |
class TestSlug(unittest.TestCase): | |
def test_should_always_return_lowercase_words(self): | |
self.assertEquals(slug('ALVAROJUSTEN'), 'alvarojusten') | |
def test_should_replace_space_with_dash(self): | |
self.assertEquals(slug('Alvaro Justen'), 'alvaro-justen') | |
def test_should_ignore_unecessary_spaces(self): | |
self.assertEquals(slug(' alvaro justen '), 'alvaro-justen') | |
def test_should_replace_nonascii_chars_with_corresponding_ascii_chars(self): | |
self.assertEquals(slug('áÁàÀãÃâÂäÄ'.decode('utf8')), 'aaaaaaaaaa') | |
self.assertEquals(slug('éÉèÈẽẼêÊëË'.decode('utf8')), 'eeeeeeeeee') | |
self.assertEquals(slug('íÍìÌĩĨîÎïÏ'.decode('utf8')), 'iiiiiiiiii') | |
self.assertEquals(slug('óÓòÒõÕôÔöÖ'.decode('utf8')), 'oooooooooo') | |
self.assertEquals(slug('úÚùÙũŨûÛüÜ'.decode('utf8')), 'uuuuuuuuuu') | |
self.assertEquals(slug('ćĆĉĈçÇ'.decode('utf8')), 'cccccc') | |
def test_should_accept_unicode_text(self): | |
self.assertEquals(slug(u'Álvaro Justen'), 'alvaro-justen') | |
def test_should_accept_other_input_encodings(self): | |
slugged_text = slug(u'Álvaro Justen'.encode('utf16'), 'utf16') | |
self.assertEquals(slugged_text, 'alvaro-justen') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment