anarchivist · April 19, 2010 16:24 · anarchivist · Apr 20, 2010
diff --git a/gistfile1.pyw b/gistfile1.pyw
 class RowDict(dict):
    """
    Subclass of dict that joins sequences and encodes to utf-8 on get.
    Encoding to utf-8 is necessary for Python's csv library because it 
    can't handle unicode.
    >>> row = RowDict()
    >>> row['bob'] = ['Montalb\\xe2an, Ricardo', 'Roddenberry, Gene']
    >>> row.get('bob')
    'Montalb\\xc3\\xa1n, Ricardo|Roddenberry, Gene'
    >>> print row.get('bob')
    Montalbán, Ricardo|Roddenberry, Gene
    """
    def get(self, key, *args):
        value = dict.get(self, key, *args)
        if not value:
            return ''
        if hasattr(value, '__iter__'):
            try:
                value = '|'.join([x for x in value if x])
            except UnicodeDecodeError:
                value = '|'.join([x.decode('utf8') for x in value if x])
        #return pymarc.marc8.marc8_to_unicode(value).encode('utf8')
        try:
            return value.encode('utf8')
        except UnicodeDecodeError:
            return value.decode('utf8', 'replace').encode('utf8')
	class RowDict(dict):
	"""
	Subclass of dict that joins sequences and encodes to utf-8 on get.
	Encoding to utf-8 is necessary for Python's csv library because it
	can't handle unicode.
	>>> row = RowDict()
	>>> row['bob'] = ['Montalb\\xe2an, Ricardo', 'Roddenberry, Gene']
	>>> row.get('bob')
	'Montalb\\xc3\\xa1n, Ricardo\|Roddenberry, Gene'
	>>> print row.get('bob')
	Montalbán, Ricardo\|Roddenberry, Gene
	"""
	def get(self, key, *args):
	value = dict.get(self, key, *args)
	if not value:
	return ''
	if hasattr(value, '__iter__'):
	try:
	value = '\|'.join([x for x in value if x])
	except UnicodeDecodeError:
	value = '\|'.join([x.decode('utf8') for x in value if x])
	#return pymarc.marc8.marc8_to_unicode(value).encode('utf8')
	try:
	return value.encode('utf8')
	except UnicodeDecodeError:
	return value.decode('utf8', 'replace').encode('utf8')
No results found