Skip to content

Instantly share code, notes, and snippets.

View analyticsindiamagazine's full-sized avatar
:octocat:

Analytics India Magazine analyticsindiamagazine

:octocat:
View GitHub Profile
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None,
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
val_InputExamples = val.apply(lambda x: bert.run_classifier.InputExample(guid=None,
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"
def create_tokenizer_from_hub_module():
"""Get the vocab file and casing info from the Hub module."""
with tf.Graph().as_default():
bert_module = hub.Module(BERT_MODEL_HUB)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
with tf.Session() as sess:
vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
num_labels):
bert_module = hub.Module(
BERT_MODEL_HUB,
trainable=True)
bert_inputs = dict(
input_ids=input_ids,
input_mask=input_mask,
segment_ids=segment_ids)
FROM tensorflow/tensorflow:1.12.0-py3
ENV LANG=C.UTF-8
RUN mkdir /gpt-2
WORKDIR /gpt-2
ADD . /gpt-2
RUN pip3 install -r requirements.txt
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#Loading Data
train = pd.read_excel("/GD/.../DataSets/Data_Train.xlsx")
#display the dataframe head(10) returns the first 10 rows
train.head(10)
#Shape of a table (number or rows, number of columns)
train.shape
#Display the names of all columns in a dataframe
#A function to find the maximun number of features in a single cell
def max_features_in_single_row(train, test, delimiter):
max_info = 0
item_lis = list(train.append(test))
for i in item_lis:
if len(i.split("{}".format(delimiter))) > max_info:
max_info = len(i.split("{}".format(delimiter)))
print("\n","-"*35)
print("Max_Features in One Observation = ", max_info)
return max_info
#A function to find all the non numeric values
def non_numerals(series):
non_numerals = []
for i in series.unique():
try :
i = float(i)
except:
non_numerals.append(i)
return non_numerals
#Filling Nulls/Nans
train.fillna('NAN', inplace = True)
#Label Encoding
from sklearn.preprocessing import LabelEncoder
le_c = LabelEncoder().fit(unique_cuisines)
train['Cuisines'] = le_l.transform(train['Cuisines'])
#Scaling
cols = [list of columns to apply standard scaler]
#Splitting training set into training and validation sets
from sklearn.model_selection import train_test_split
train, val = train_test_split(train_sample, test_size = 0.1, random_state = 123)
#Seperating the independent and dependent variables
cols = [list of column names in order] #last column corresponds to y or target variable
#Training set
X_train = train[cols[:-1]]
Y_train = train[cols[-1]]
#Validation set