idan · October 8, 2014 13:13 · cmcaine · Oct 22, 2017
diff --git a/gistfile1.py b/gistfile1.py
 def explode_hstore(df, column):
    """Explode a column of a dataframe containing PostgreSQL hstore k-v pairs in the format of:
    
    "foo"=>"bar", "baz"=>"quux", ...
    
    Every key becomes a column. If a given row's pairs doesn't have a key, then the resulting column's value
    will contain NaN.
    """
    # split the tags column out as a new series, and break up each k=>v pair
    s = df[column].str.split(', ').apply(pd.Series, 1).stack()
    
    # extract the keys and values into columns
    extracted = s.str.extract(r'"(?P<key>[\w-]+)"=>"(?P<val>[\w-]+)"', re.IGNORECASE)
    
    # toss the unnecessary multi index that is generated in the process
    extracted.index = extracted.index.droplevel(1)
    
    # pivot the table make the rows in keys become columns
    pivoted = extracted.pivot_table(values='val', index=extracted.index, columns='key', aggfunc='first')
    
    # join with the original table and return
    return df.join(pivoted)
	def explode_hstore(df, column):
	"""Explode a column of a dataframe containing PostgreSQL hstore k-v pairs in the format of:

	"foo"=>"bar", "baz"=>"quux", ...

	Every key becomes a column. If a given row's pairs doesn't have a key, then the resulting column's value
	will contain NaN.
	"""
	# split the tags column out as a new series, and break up each k=>v pair
	s = df[column].str.split(', ').apply(pd.Series, 1).stack()

	# extract the keys and values into columns
	extracted = s.str.extract(r'"(?P<key>[\w-]+)"=>"(?P<val>[\w-]+)"', re.IGNORECASE)

	# toss the unnecessary multi index that is generated in the process
	extracted.index = extracted.index.droplevel(1)

	# pivot the table make the rows in keys become columns
	pivoted = extracted.pivot_table(values='val', index=extracted.index, columns='key', aggfunc='first')

	# join with the original table and return
	return df.join(pivoted)