This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import bz2 | |
| import numpy as np | |
| import pickle | |
| from django.conf import settings | |
| from django_redis import get_redis_connection | |
| from gensim.models.keyedvectors import KeyedVectors | |
| from .constants import GOOGLE_WORD2VEC_MODEL_NAME | |
| from .redis import load_word2vec_model_into_redis, query_redis |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_highlighted_tokens(tokens, matrix, model, layer_name, threshold, y_labels): | |
| indices = get_token_indices(model, layer_name, threshold, matrix, y_labels) | |
| ctokens = [] | |
| for i, t in enumerate(tokens): | |
| if i in indices.keys(): | |
| _color = color(indices[i], threshold=threshold) | |
| ctokens.append(cstr(t, color=_color)) | |
| else: | |
| ctokens.append(t) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_token_indices(model, layer_name, threshold, matrix, y_labels): | |
| heatmap = get_heatmap(model=model, layer_name=layer_name, matrix=matrix, y_labels=y_labels) | |
| _, output_dim = get_conv_layer(model, layer_name) | |
| # depending on the ration between the input and layer output shape, we need to calculate | |
| # how many original tokens have contributed to the layer output | |
| dim_ratio = matrix.shape[1] / output_dim | |
| if dim_ratio < 1.5: | |
| window_size = 1 | |
| else: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def color(hvalue, threshold, max=1, cdefault='black', colors=['red', 'yellow', 'green', 'cyan', 'blue']): | |
| num_colors = len(colors) | |
| if hvalue < threshold: | |
| return cdefault | |
| for i, color in enumerate(colors): | |
| if hvalue > (max - (max - threshold) / num_colors * (i + 1)): | |
| return color |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def cstr(s, color='black'): | |
| return "<text style=\"color:{}\">{}</text>".format(color, s) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def plot_heatmap(heatmap, height_ratio=0.05): | |
| # calculating how often the vector should be repeated to display a height relative to the vector length | |
| repeat_vector_n_times = int(heatmap.shape[0] * height_ratio) | |
| plt.matshow([heatmap] * repeat_vector_n_times) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def norm_heatmap(heatmap): | |
| # element-wise maximum calculation, basically setting all negative values to zero | |
| heatmap = np.maximum(heatmap, 0) | |
| # normalizing the heatmap to values between 0 and 1 | |
| norm_heatmap = heatmap / np.max(heatmap) | |
| return norm_heatmap |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_heatmap(model, layer_name, matrix, y_labels): | |
| # obtain probability of the label with the highest certainty | |
| network_output = model.get_output_at(0)[:, np.argmax(y_labels)] | |
| # obtain the output vector and its dimension of the convolutional layer we want to visualize | |
| conv_layer, layer_output_dim = get_conv_layer(model, layer_name) | |
| # Setting up the calculation of the gradients between the output and the conv layer. Will be executed in the iteration step | |
| grads = K.gradients(network_output, conv_layer.output)[0] | |
| # average the gradients across our samples (one sample) and all filters |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_conv_layer(model, layer_name): | |
| conv_layer = model.get_layer(layer_name) | |
| output_dim = conv_layer.output_shape[1] | |
| return conv_layer, output_dim |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.