Skip to content

Instantly share code, notes, and snippets.

@faried
Created February 6, 2011 18:28
Show Gist options
  • Save faried/813583 to your computer and use it in GitHub Desktop.
Save faried/813583 to your computer and use it in GitHub Desktop.
try harder to parse headers
def utf8decode(thing):
"""Try to convert from ASCII to UTF-8."""
try:
return thing.decode('utf-8')
except UnicodeDecodeError, _exc:
# many badly created messages will be like this
try:
return thing.decode('iso-8859-15')
except UnicodeDecodeError, exc:
# haven't run into this case yet. why not?
# do i need to try and parse more bad spam?
logging.error('failed to decode %s: %s', thing, str(exc))
sys.exit(1)
def rfc2047decode(name):
"""A few fields can be encoded with RFC 2047.
Use as
subject = rfc2047decode(message.get('Subject', ''))
Details at http://tools.ietf.org/html/rfc2047.html
Note: does not preserve whitespace. For example,
Re: [Loungers] =?utf-8?q?Korea?=
and
Re: [Loungers] =?utf-8?q?Korea?=
both decode to
Re: [Loungers] Korea
TODO: check if this behavior is appropriate.
"""
if name:
decodelist = decode_header(name)
retls = []
for pair in decodelist:
if pair[1]:
try:
dname = pair[0].decode(pair[1])
except UnicodeDecodeError, exc:
logging.warning('unable to decode %s as %s: %s',
pair[0], pair[1], str(exc))
retls.append(utf8decode(pair[0]))
else:
retls.append(dname)
else:
retls.append(utf8decode(pair[0]))
return u' '.join(retls)
return name
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment