Skip to content

Instantly share code, notes, and snippets.

mlb = MultiLabelBinarizer()
# one-hot encoding + prefix
df = df.join(pd.DataFrame(mlb.fit_transform(df.pop('variants')),
columns=[x for x in mlb.classes_],
index=df.index))
df = df.join(pd.DataFrame(mlb.fit_transform(df.pop('subsets')),
columns=['subsets_' + x for x in mlb.classes_],
index=df.index))
df = df.join(pd.get_dummies(df['category'], prefix="category")).drop(['category'], axis=1)
# select only the columns we need
cols = ['family', 'variants', 'subsets', 'category']
df = df[cols]
# df.head(5)
# Remove any space from family string so that it matchs with file name convention.
df.family = [name.replace(' ', '') for name in df.family]
df.head(5)
# option 1. dynamically download JSON
# url = 'https://www.googleapis.com/webfonts/v1/webfonts?key='
# key = 'YOUR-API-KEY'
# data = pd.read_json(url+key, orient='')
# option 2. use JSON already downloaded (replace with your own file path)
df = pd.read_json('../../input/fonts-master.json')
# df.head()
# flatten the JSON hierarchy (easier to handle this way)
import matplotlib.pyplot as plt
from sklearn.preprocessing import MultiLabelBinarizer
import pandas as pd
import numpy as np