Skip to content

Instantly share code, notes, and snippets.

@analyticsindiamagazine
Created November 27, 2019 05:25
Show Gist options
  • Save analyticsindiamagazine/b10e5c1a9d85ce6513c21c1edf356431 to your computer and use it in GitHub Desktop.
Save analyticsindiamagazine/b10e5c1a9d85ce6513c21c1edf356431 to your computer and use it in GitHub Desktop.
# Creates a tf feature spec from the dataframe and columns specified.
def create_feature_spec(df, columns=None):
feature_spec = {}
if columns == None:
columns = df.columns.values.tolist()
for f in columns:
if df[f].dtype is np.dtype(np.int64):
feature_spec[f] = tf.compat.v1.FixedLenFeature(shape=(), dtype=tf.int64)
elif df[f].dtype is np.dtype(np.float64):
feature_spec[f] = tf.compat.v1.FixedLenFeature(shape=(), dtype=tf.float32)
else:
feature_spec[f] = tf.compat.v1.FixedLenFeature(shape=(), dtype=tf.string)
return feature_spec
# Creates simple numeric and categorical feature columns from a feature spec and a
# list of columns from that spec to use.
def create_feature_columns(columns, feature_spec):
ret = []
for col in columns:
if feature_spec[col].dtype is tf.int64 or feature_spec[col].dtype is tf.float32:
ret.append(tf.feature_column.numeric_column(col))
else:
ret.append(tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_vocabulary_list(col, list(df[col].unique()))))
return ret
def tfexamples_input_fn(examples, feature_spec, label, mode=tf.estimator.ModeKeys.EVAL,
num_epochs=None,
batch_size=64):
def ex_generator():
for i in range(len(examples)):
yield examples[i].SerializeToString()
dataset = tf.data.Dataset.from_generator(
ex_generator, tf.dtypes.string, tf.TensorShape([]))
if mode == tf.estimator.ModeKeys.TRAIN:
dataset = dataset.shuffle(buffer_size=2 * batch_size + 1)
dataset = dataset.batch(batch_size)
dataset = dataset.map(lambda tf_example: parse_tf_example(tf_example, label, feature_spec))
dataset = dataset.repeat(num_epochs)
return dataset
# Parses Tf.Example protos into features for the input function.
def parse_tf_example(example_proto, label, feature_spec):
parsed_features = tf.io.parse_example(serialized=example_proto, features=feature_spec)
target = parsed_features.pop(label)
return parsed_features, target
# Converts a dataframe into a list of tf.Example protos.
def df_to_examples(df, columns=None):
examples = []
if columns == None:
columns = df.columns.values.tolist()
for index, row in df.iterrows():
example = tf.train.Example()
for col in columns:
if df[col].dtype is np.dtype(np.int64):
example.features.feature[col].int64_list.value.append(int(row[col]))
elif df[col].dtype is np.dtype(np.float64):
example.features.feature[col].float_list.value.append(row[col])
elif row[col] == row[col]:
example.features.feature[col].bytes_list.value.append(row[col].encode('utf-8'))
examples.append(example)
return examples
# Encodes the label column and returns classes
from sklearn.preprocessing import LabelEncoder
def make_label_column_numeric(df, label_column, unique_classes):
#df[label_column] = np.where(test(df[label_column]), 1, 0)
le = LabelEncoder().fit(unique_classes)
df[label_column] = le.transform(df[label_column] )
return le.classes_
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment