Last active
December 17, 2015 04:29
-
-
Save rafbm/5550827 to your computer and use it in GitHub Desktop.
Fix common downcase/whitespace issues in people names
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
require 'minitest/autorun' | |
# `gem install unicode_utils` | |
require 'unicode_utils/downcase' | |
require 'unicode_utils/upcase' | |
module HumanName | |
def self.all_lower_case?(name) | |
name.to_s == UnicodeUtils.downcase(name.to_s) | |
end | |
def self.normalize(*names) | |
if names.size > 1 | |
return names.map { |name| normalize(name) } | |
end | |
name = names[0] | |
return '' if !name.is_a? String | |
name = name.strip.gsub(/\s+/, ' ') | |
if all_lower_case? name | |
name.gsub(/(\A|[^\p{Word}])(\p{Word})/) { $1 + UnicodeUtils.upcase($2) } | |
else | |
name | |
end | |
end | |
end | |
describe HumanName do | |
describe :all_lower_case? do | |
it 'works without Unicode characters' do | |
assert HumanName.all_lower_case?("uber-rafael o'neil mcmason") | |
refute HumanName.all_lower_case?("Uber-Rafael O'Neil McMason") | |
refute HumanName.all_lower_case?("UBER-RAFAEL O'NEIL MCMASON") | |
end | |
it 'works with Unicode characters' do | |
assert HumanName.all_lower_case?('über-rafaél o’neil mcmason') | |
refute HumanName.all_lower_case?('Über-Rafaél O’Neil McMason') | |
refute HumanName.all_lower_case?('ÜBER-RAFAÉL O’NEIL MCMASON') | |
end | |
end | |
describe :normalize do | |
it 'strips whitespace' do | |
assert_equal 'Rafaél Über-Mason', HumanName.normalize('Rafaél Über-Mason ') | |
assert_equal 'Rafaél Über-Mason', HumanName.normalize(' Rafaél Über-Mason ') | |
assert_equal 'Rafaél Über-Mason', HumanName.normalize(' Rafaél Über-Mason') | |
assert_equal 'Rafaél Über-Mason', HumanName.normalize('Rafaél Über-Mason') | |
assert_equal 'Rafaél Über-Mason', HumanName.normalize('Rafaél Über-Mason') # tab, why not | |
end | |
it 'leaves name untouched as soon as there’s an uppercase character' do | |
assert_equal 'Über-Rafaél O’Neil McMason', HumanName.normalize('Über-Rafaél O’Neil McMason') | |
assert_equal 'JP Doozle', HumanName.normalize('JP Doozle') | |
assert_equal 'OJ moozle', HumanName.normalize('OJ moozle') | |
assert_equal 'STEVEN BARZLE', HumanName.normalize('STEVEN BARZLE') | |
end | |
it 'capitalizes every word when there’s no uppercase character' do | |
assert_equal 'Über-Rafaél O’Neil Mcmason', HumanName.normalize('über-rafaél o’neil mcmason') | |
assert_equal 'Rafaél Über-Mason', HumanName.normalize('rafaél über-mason') | |
assert_equal 'Elize Ballock', HumanName.normalize('elize ballock') | |
assert_equal 'Roche', HumanName.normalize('roche') | |
assert_equal 'Larry Mckinnon', HumanName.normalize('larry mckinnon') | |
end | |
it 'returns an empty string on bad input' do | |
assert_equal '', HumanName.normalize(' ') | |
assert_equal '', HumanName.normalize(nil) | |
assert_equal '', HumanName.normalize(0) | |
assert_equal '', HumanName.normalize(1) | |
assert_equal '', HumanName.normalize(true) | |
assert_equal '', HumanName.normalize(false) | |
assert_equal '', HumanName.normalize([]) | |
assert_equal '', HumanName.normalize({}) | |
end | |
it 'returns an array when passed multiple arguments' do | |
assert_equal ['Jean', 'de la Fontaine'], HumanName.normalize(' Jean', 'de la Fontaine ') | |
assert_equal ['Jean', 'de la Fontaine'], HumanName.normalize('Jean', 'de la Fontaine') | |
assert_equal ['Jean', 'De La Fontaine'], HumanName.normalize('jean', 'de la fontaine') | |
assert_equal ['Jean', 'de la Fontaine'], HumanName.normalize('jean', 'de la Fontaine') | |
assert_equal ['Über-Rafaél', 'O’Neil', 'McMason'], HumanName.normalize('Über-Rafaél', 'O’Neil', 'McMason') | |
assert_equal ['Über-Rafaél', 'O’Neil', 'Mcmason'], HumanName.normalize('über-rafaél', 'o’neil', 'mcmason') | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment