Skip to content

Instantly share code, notes, and snippets.

@halegreen
Last active November 22, 2017 08:29
Show Gist options
  • Save halegreen/1c6f400a505996385ce69b18a69d8348 to your computer and use it in GitHub Desktop.
Save halegreen/1c6f400a505996385ce69b18a69d8348 to your computer and use it in GitHub Desktop.
Transform dataframe data to the ffmlib datatype
## for category columns
def category_feature2FFM(data, category_list):
previous_len = 0
for i in range(len(category_list)):
category_name = category_list[i]
dic = data[category_name].unique()
dic = dict(zip(dic, range(len(dic))))
def data2ffm(x):
return (i, dic.get(x)+previous_len, 1)
data[category_name] = data[category_name].map(data2ffm)
previous_len += len(dic)
print('%s转换完成'%category_name)
return data
## for numerical columns
def numeric_feature2FFM(data, numeric_list, start_num):
for i in range(len(numeric_list)):
numeric_name = numeric_list[i]
field_id = start_num + i
def numeric2ffm(x):
return (field_id, int(1), x)
scaler = MinMaxScaler()
scaler.fit(data[numeric_name])
data[numeric_name] = scaler.transform(data[numeric_name])
data[numeric_name] = data[numeric_name].map(numeric2ffm)
print('%s转换完成'%numeric_name)
return data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment