This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, | |
text_a = x[DATA_COLUMN], | |
text_b = None, | |
label = x[LABEL_COLUMN]), axis = 1) | |
val_InputExamples = val.apply(lambda x: bert.run_classifier.InputExample(guid=None, | |
text_a = x[DATA_COLUMN], | |
text_b = None, | |
label = x[LABEL_COLUMN]), axis = 1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a path to an uncased (all lowercase) version of BERT | |
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1" | |
def create_tokenizer_from_hub_module(): | |
"""Get the vocab file and casing info from the Hub module.""" | |
with tf.Graph().as_default(): | |
bert_module = hub.Module(BERT_MODEL_HUB) | |
tokenization_info = bert_module(signature="tokenization_info", as_dict=True) | |
with tf.Session() as sess: | |
vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels, | |
num_labels): | |
bert_module = hub.Module( | |
BERT_MODEL_HUB, | |
trainable=True) | |
bert_inputs = dict( | |
input_ids=input_ids, | |
input_mask=input_mask, | |
segment_ids=segment_ids) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM tensorflow/tensorflow:1.12.0-py3 | |
ENV LANG=C.UTF-8 | |
RUN mkdir /gpt-2 | |
WORKDIR /gpt-2 | |
ADD . /gpt-2 | |
RUN pip3 install -r requirements.txt |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Loading Data | |
train = pd.read_excel("/GD/.../DataSets/Data_Train.xlsx") | |
#display the dataframe head(10) returns the first 10 rows | |
train.head(10) | |
#Shape of a table (number or rows, number of columns) | |
train.shape | |
#Display the names of all columns in a dataframe |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#A function to find the maximun number of features in a single cell | |
def max_features_in_single_row(train, test, delimiter): | |
max_info = 0 | |
item_lis = list(train.append(test)) | |
for i in item_lis: | |
if len(i.split("{}".format(delimiter))) > max_info: | |
max_info = len(i.split("{}".format(delimiter))) | |
print("\n","-"*35) | |
print("Max_Features in One Observation = ", max_info) | |
return max_info |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#A function to find all the non numeric values | |
def non_numerals(series): | |
non_numerals = [] | |
for i in series.unique(): | |
try : | |
i = float(i) | |
except: | |
non_numerals.append(i) | |
return non_numerals |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Filling Nulls/Nans | |
train.fillna('NAN', inplace = True) | |
#Label Encoding | |
from sklearn.preprocessing import LabelEncoder | |
le_c = LabelEncoder().fit(unique_cuisines) | |
train['Cuisines'] = le_l.transform(train['Cuisines']) | |
#Scaling | |
cols = [list of columns to apply standard scaler] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Splitting training set into training and validation sets | |
from sklearn.model_selection import train_test_split | |
train, val = train_test_split(train_sample, test_size = 0.1, random_state = 123) | |
#Seperating the independent and dependent variables | |
cols = [list of column names in order] #last column corresponds to y or target variable | |
#Training set | |
X_train = train[cols[:-1]] | |
Y_train = train[cols[-1]] | |
#Validation set |