Created
November 27, 2019 05:25
-
-
Save analyticsindiamagazine/b10e5c1a9d85ce6513c21c1edf356431 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Creates a tf feature spec from the dataframe and columns specified. | |
def create_feature_spec(df, columns=None): | |
feature_spec = {} | |
if columns == None: | |
columns = df.columns.values.tolist() | |
for f in columns: | |
if df[f].dtype is np.dtype(np.int64): | |
feature_spec[f] = tf.compat.v1.FixedLenFeature(shape=(), dtype=tf.int64) | |
elif df[f].dtype is np.dtype(np.float64): | |
feature_spec[f] = tf.compat.v1.FixedLenFeature(shape=(), dtype=tf.float32) | |
else: | |
feature_spec[f] = tf.compat.v1.FixedLenFeature(shape=(), dtype=tf.string) | |
return feature_spec | |
# Creates simple numeric and categorical feature columns from a feature spec and a | |
# list of columns from that spec to use. | |
def create_feature_columns(columns, feature_spec): | |
ret = [] | |
for col in columns: | |
if feature_spec[col].dtype is tf.int64 or feature_spec[col].dtype is tf.float32: | |
ret.append(tf.feature_column.numeric_column(col)) | |
else: | |
ret.append(tf.feature_column.indicator_column( | |
tf.feature_column.categorical_column_with_vocabulary_list(col, list(df[col].unique())))) | |
return ret | |
def tfexamples_input_fn(examples, feature_spec, label, mode=tf.estimator.ModeKeys.EVAL, | |
num_epochs=None, | |
batch_size=64): | |
def ex_generator(): | |
for i in range(len(examples)): | |
yield examples[i].SerializeToString() | |
dataset = tf.data.Dataset.from_generator( | |
ex_generator, tf.dtypes.string, tf.TensorShape([])) | |
if mode == tf.estimator.ModeKeys.TRAIN: | |
dataset = dataset.shuffle(buffer_size=2 * batch_size + 1) | |
dataset = dataset.batch(batch_size) | |
dataset = dataset.map(lambda tf_example: parse_tf_example(tf_example, label, feature_spec)) | |
dataset = dataset.repeat(num_epochs) | |
return dataset | |
# Parses Tf.Example protos into features for the input function. | |
def parse_tf_example(example_proto, label, feature_spec): | |
parsed_features = tf.io.parse_example(serialized=example_proto, features=feature_spec) | |
target = parsed_features.pop(label) | |
return parsed_features, target | |
# Converts a dataframe into a list of tf.Example protos. | |
def df_to_examples(df, columns=None): | |
examples = [] | |
if columns == None: | |
columns = df.columns.values.tolist() | |
for index, row in df.iterrows(): | |
example = tf.train.Example() | |
for col in columns: | |
if df[col].dtype is np.dtype(np.int64): | |
example.features.feature[col].int64_list.value.append(int(row[col])) | |
elif df[col].dtype is np.dtype(np.float64): | |
example.features.feature[col].float_list.value.append(row[col]) | |
elif row[col] == row[col]: | |
example.features.feature[col].bytes_list.value.append(row[col].encode('utf-8')) | |
examples.append(example) | |
return examples | |
# Encodes the label column and returns classes | |
from sklearn.preprocessing import LabelEncoder | |
def make_label_column_numeric(df, label_column, unique_classes): | |
#df[label_column] = np.where(test(df[label_column]), 1, 0) | |
le = LabelEncoder().fit(unique_classes) | |
df[label_column] = le.transform(df[label_column] ) | |
return le.classes_ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment