Created
September 6, 2018 02:34
-
-
Save estasney/0e5a78201dd5f5ad62d3960df59ba2c0 to your computer and use it in GitHub Desktop.
Pattern NLP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import numpy as np | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
sns.set() | |
def node_type(x): | |
if isinstance(x, str): | |
return 2 | |
elif len(str(x))<=2: | |
return 1 | |
else: | |
return 0 | |
def make_pallete(n): | |
pal = sns.color_palette('hls', n) | |
def convert_255(x): | |
valpal = [] | |
for y in x: | |
valpal.append(math.ceil(y * 255)) | |
return tuple(valpal) | |
pal = [convert_255(x) for x in pal] | |
return pal | |
def rgb2hex(x): | |
return '#%02x%02x%02x' % (x[0], x[1], x[2]) | |
def node_color(x, topic_colors): | |
if isinstance(x, str): | |
return '#33cc33' | |
elif len(str(x))<=2: | |
return topic_colors[x] | |
else: | |
return '#3366ff' | |
def color_mix(req_id, color_dict): | |
nodes_data = list(graph[req_id].items()) | |
if len(nodes_data) == 1: | |
return color_dict[nodes_data[0][0]] | |
nodes = [k for k, v in nodes_data] | |
node_weights = dict(nodes_data) | |
node_weights = {k: v['weight'] for k, v in node_weights.items()} | |
# Scale weights so they sum to 1 | |
scaler = 1 / sum(node_weights.values()) | |
scaled_weights = list(node_weights.values()) | |
scaled_weights = [(sw * scaler) for sw in scaled_weights] | |
scaled_weights = {k: v for k, v in zip(nodes, scaled_weights)} | |
def scaled_rgb(x, s): | |
return [y * s for y in x] | |
scaled_colors = [] | |
for node, weight_scaler in scaled_weights.items(): | |
node_color = color_dict[node] | |
scaled_color = scaled_rgb(node_color, weight_scaler) | |
scaled_colors.append(scaled_color) | |
# Create an array and calculate mean | |
scaled_colors = np.array(scaled_colors) | |
mixed_color = list(np.mean(scaled_colors, axis=0)) | |
mixed_color = [math.ceil(x) for x in mixed_color] | |
return tuple(mixed_color) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import chain | |
from pattern.en import parsetree | |
def extract(x): | |
keep_tags = ['NN'] | |
x = parsetree(x, relations=True, lemmata=True) | |
sents = list(filter(lambda x: x.is_question is False, x)) | |
sents = list(filter(lambda x: x.is_exclamation is False, x)) | |
sents = map(lambda x: x.words, sents) | |
words = chain.from_iterable(sents) | |
word_tags = map(lambda x: (str(x.lemma), x.tag), words) | |
word_tags = filter(lambda x: any([x[1].startswith(k) for k in keep_tags]), word_tags) | |
words = [word.lower() for word, tag in word_tags] | |
words = [word for word in words if word not in p_selected] | |
return words | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment