Skip to content

Instantly share code, notes, and snippets.

@rochacbruno
Forked from turicas/Makefile
Created December 5, 2011 00:39
Show Gist options
  • Save rochacbruno/1431805 to your computer and use it in GitHub Desktop.
Save rochacbruno/1431805 to your computer and use it in GitHub Desktop.
Create slugs using Python
test:
clear
nosetests --with-coverage --cover-package slugfy test_slugfy.py
clean:
find -regex '.*\.pyc' -exec rm {} \;
find -regex '.*~' -exec rm {} \;
.PHONY: test clean
#!/usr/bin/env python
#coding: utf-8
from unicodedata import normalize
def slug(text, encoding=None):
if isinstance(text, str):
text = text.decode(encoding or 'ascii')
clean_text = text.strip().replace(' ', '-')
while '--' in clean_text:
clean_text = clean_text.replace('--', '-')
ascii_text = normalize('NFKD', clean_text).encode('ascii', 'ignore')
return ascii_text.lower()
#!/usr/bin/env python
# coding: utf-8
import unittest
from slugfy import slug
class TestSlug(unittest.TestCase):
def test_should_always_return_lowercase_words(self):
self.assertEquals(slug('ALVAROJUSTEN'), 'alvarojusten')
def test_should_replace_space_with_dash(self):
self.assertEquals(slug('Alvaro Justen'), 'alvaro-justen')
def test_should_ignore_unecessary_spaces(self):
self.assertEquals(slug(' alvaro justen '), 'alvaro-justen')
def test_should_replace_nonascii_chars_with_corresponding_ascii_chars(self):
self.assertEquals(slug('áÁàÀãÃâÂäÄ'.decode('utf8')), 'aaaaaaaaaa')
self.assertEquals(slug('éÉèÈẽẼêÊëË'.decode('utf8')), 'eeeeeeeeee')
self.assertEquals(slug('íÍìÌĩĨîÎïÏ'.decode('utf8')), 'iiiiiiiiii')
self.assertEquals(slug('óÓòÒõÕôÔöÖ'.decode('utf8')), 'oooooooooo')
self.assertEquals(slug('úÚùÙũŨûÛüÜ'.decode('utf8')), 'uuuuuuuuuu')
self.assertEquals(slug('ćĆĉĈçÇ'.decode('utf8')), 'cccccc')
def test_should_accept_unicode_text(self):
self.assertEquals(slug(u'Álvaro Justen'), 'alvaro-justen')
def test_should_accept_other_input_encodings(self):
slugged_text = slug(u'Álvaro Justen'.encode('utf16'), 'utf16')
self.assertEquals(slugged_text, 'alvaro-justen')
@imrek
Copy link

imrek commented May 2, 2017

Not sure if this was intended to support all languages, but this won't work e.g. with German: 'ß' is dropped, Polish: 'ł' is dropped, Croatian: đ is dropped, and so on.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment