louisguitton · March 20, 2019 13:34
diff --git a/tag_counts.py b/tag_counts.py
 import pandas as pd

 # in the end, I went for a recursive function because tags can be arbitrarily nested
 def extract_from_dict(var, key):
    if isinstance(var, dict):
        for k, v in var.items():
            if k == key:
                yield v
            if isinstance(v, (dict, list)):
                yield from gen_dict_extract(v, key)
    elif isinstance(var, list):
        for d in var:
            yield from gen_dict_extract(d, key)
            
 tags_arrays = pd.read_json('segments.json').set_index('segment_id').criteria.apply(lambda x: extract_from_dict(x, 'tag')).apply(list)
 tags = tags_arrays.apply(pd.Series).stack().rename('tag').reset_index(level=0)
 tag_counts = tags.groupby('tag').count().sort_values('segment_id', ascending=False)
 tag_counts.to_csv('tag_counts.csv')
	import pandas as pd

	# in the end, I went for a recursive function because tags can be arbitrarily nested
	def extract_from_dict(var, key):
	if isinstance(var, dict):
	for k, v in var.items():
	if k == key:
	yield v
	if isinstance(v, (dict, list)):
	yield from gen_dict_extract(v, key)
	elif isinstance(var, list):
	for d in var:
	yield from gen_dict_extract(d, key)

	tags_arrays = pd.read_json('segments.json').set_index('segment_id').criteria.apply(lambda x: extract_from_dict(x, 'tag')).apply(list)
	tags = tags_arrays.apply(pd.Series).stack().rename('tag').reset_index(level=0)
	tag_counts = tags.groupby('tag').count().sort_values('segment_id', ascending=False)
	tag_counts.to_csv('tag_counts.csv')
No results found