Created
June 3, 2011 21:03
-
-
Save lukaszb/1007169 to your computer and use it in GitHub Desktop.
username & email extractor for VCS & RhodeCode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def author_email(author): | |
""" | |
returns email address of given author. | |
If any of <,> sign are found, it fallbacks to regex findall() | |
and returns first found result or empty string | |
Regex taken from http://www.regular-expressions.info/email.html | |
""" | |
import re | |
r = author.find('>') | |
l = author.find('<') | |
if l == -1 or r == -1: | |
# fallback to regex match of email out of a string | |
email_re = re.compile(r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!""" | |
r"""#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z""" | |
r"""0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]""" | |
r"""*[a-z0-9])?""", re.IGNORECASE) | |
m = re.findall(email_re, author) | |
return m[0] if m else '' | |
return author[l + 1:r].strip() | |
def author_name(author): | |
""" | |
get name of author, or else username. | |
It'll try to find an email in the author string and just cut it off | |
to get the username | |
""" | |
if not '@' in author: | |
return author | |
else: | |
return author.replace(author_email(author), '').replace('<', '')\ | |
.replace('>', '').strip() | |
#============================================================================== | |
# TESTS | |
#============================================================================== | |
test_usernames = [('Marcin Kuzminski <[email protected]>', | |
('Marcin Kuzminski', '[email protected]')), | |
('Marcin Kuzminski Spaces < [email protected] >', | |
('Marcin Kuzminski Spaces', '[email protected]')), | |
('Marcin Kuzminski <[email protected]>', | |
('Marcin Kuzminski', '[email protected]')), | |
('mrf RFC_SPEC <[email protected]>', | |
('mrf RFC_SPEC', '[email protected]')), | |
('username <[email protected]>', | |
('username', '[email protected]')), | |
('username <[email protected]', | |
('username', '[email protected]')), | |
('broken [email protected]', | |
('broken', '[email protected]')), | |
('<[email protected]>', | |
('', '[email protected]')), | |
('justname', | |
('justname', '')), | |
('Mr Double Name [email protected] ', | |
('Mr Double Name', '[email protected]')), | |
] | |
def test_author_email(): | |
for test_str, result in test_usernames: | |
assert result[1] == author_email(test_str) | |
def test_author_name(): | |
for test_str, result in test_usernames: | |
assert result[0] == author_name(test_str) | |
if __name__ == '__main__': | |
test_author_email() | |
test_author_name() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment