.es(index=apa*,q=geoip.country_code2:FR,metric=sum:bytes).label("France").divide(.es(index=apa*, q=geoip.country_code2:,metric=sum:bytes)).multiply(100).yaxis(units="custom::%"), .es(index=apa,q=geoip.country_code2:DE,metric=sum:bytes).label("Germany",).divide(.es(index=apa*, q=geoip.country_code2:*,metric=sum:bytes)).multiply(100).yaxis(units="custom::%")
## importing dataset | |
from google.colab import drive | |
drive.mount('gdrive', force_remount=True) |
## configure root folder on your gdrive | |
data_dir = 'gdrive/My Drive/DAIR RESOURCES/TF to PT/datasets/hymenoptera_data' | |
## custom transformer to flatten the image tensors | |
class ReshapeTransform: | |
def __init__(self, new_size): | |
self.new_size = new_size | |
def __call__(self, img): | |
result = torch.reshape(img, self.new_size) |
## load the entire dataset | |
x, y = next(iter(train_dataset)) | |
## print one example | |
dim = x.shape[1] | |
print("Dimension of image:", x.shape, "\n", | |
"Dimension of labels", y.shape) | |
plt.imshow(x[160].reshape(1, 3, 224, 224).squeeze().T.numpy()) |
class LR(nn.Module): | |
def __init__(self, dim, lr=torch.scalar_tensor(0.01)): | |
super(LR, self).__init__() | |
# intialize parameters | |
self.w = torch.zeros(dim, 1, dtype=torch.float).to(device) | |
self.b = torch.scalar_tensor(0).to(device) | |
self.grads = {"dw": torch.zeros(dim, 1, dtype=torch.float).to(device), | |
"db": torch.scalar_tensor(0).to(device)} | |
self.lr = lr.to(device) |
## model pretesting | |
x, y = next(iter(train_dataset)) | |
## flatten/transform the data | |
x_flatten = x.T | |
y = y.unsqueeze(0) | |
## num_px is the dimension of the images | |
dim = x_flatten.shape[0] |
## hyperparams | |
costs = [] | |
dim = x_flatten.shape[0] | |
learning_rate = torch.scalar_tensor(0.0001).to(device) | |
num_iterations = 100 | |
lrmodel = LR(dim, learning_rate) | |
lrmodel.to(device) | |
## transform the data | |
def transform_data(x, y): |
## the trend in the context of loss | |
plt.plot(costs) | |
plt.show() |
Cost after iteration 0: 0.6931470036506653 | Train Acc: 50.40983581542969 | Test Acc: 45.75163269042969 | |
Cost after iteration 10: 0.6691471934318542 | Train Acc: 64.3442611694336 | Test Acc: 54.24836730957031 | |
Cost after iteration 20: 0.6513187885284424 | Train Acc: 68.44261932373047 | Test Acc: 54.24836730957031 | |
Cost after iteration 30: 0.6367831230163574 | Train Acc: 68.03278350830078 | Test Acc: 54.24836730957031 | |
Cost after iteration 40: 0.6245343685150146 | Train Acc: 69.67213439941406 | Test Acc: 54.90196228027344 | |
Cost after iteration 50: 0.6139233112335205 | Train Acc: 70.90164184570312 | Test Acc: 56.20914840698242 | |
Cost after iteration 60: 0.6045243740081787 | Train Acc: 72.54098510742188 | Test Acc: 56.86274337768555 | |
Cost after iteration 70: 0.5960519909858704 | Train Acc: 74.18032836914062 | Test Acc: 57.51633834838867 | |
Cost after iteration 80: 0.5883094668388367 | Train Acc: 73.77049255371094 | Test Acc: 57.51633834838867 | |
Cost after iteration 90: 0.581156849861145 | Train Acc: 74.59016418457031 | Test |
Applied Deep Learning for NLP Applications
Natural language processing (NLP) has become an important field with interest from many important sectors that leverage modern deep learning methods for approaching several NLP problems and tasks such as text summarization, question answering, and sentiment classification, to name a few. In this tutorial, we will introduce several of the fundamental NLP techniques and more modern approaches (BERT, GTP-2, etc.) and show how they can be applied via transfer learning to approach many real-world NLP problems. We will focus on how to build an NLP pipeline using several open-source tools such as Transformers, Tokenizers, spaCy, TensorFlow, and PyTorch, among others. Then we will learn how to use the NLP model to search over documents based on semantic relationships. We will use open-source technologies such as BERT and Elasticsearch for this segment to build a proof of concept. In essence, the learner will take away the important theoretical pieces ne