I hereby claim:
- I am nempickaxe on github.
- I am ilaichi (https://keybase.io/ilaichi) on keybase.
- I have a public key ASC0peYsZX_Z7LwCfPjY9FJz_772TLP9XsoLON6QsTED-go
To claim this, I am signing this object:
| import re | |
| import yaml | |
| def parse_config(vars_dict, path=None, data=None, tag='!ENV'): | |
| """ | |
| Load a yaml configuration file and resolve any environment variables | |
| The environment variables must have !ENV before them and be in this format | |
| to be parsed: $<VAR_NAME>. | |
| E.g.: | |
| database: |
| import re | |
| import nltk | |
| import emoji | |
| from nltk.tokenize import word_tokenize | |
| def tokenize(corpus): | |
| data = re.sub(r'[,!?;-]+', '.', corpus) | |
| data = nltk.word_tokenize(data) # tokenize string to words | |
| data = [ ch.lower() for ch in data | |
| if ch.isalpha() |
| import textwrap | |
| import PIL | |
| from PIL import ImageFont | |
| from PIL import Image | |
| from PIL import ImageDraw | |
| def text2png(text, fullpath, color = "#000", bgcolor = "#FFF", fontfullpath = None, fontsize = 13, leftpadding = 3, rightpadding = 3, width = 2000): | |
| REPLACEMENT_CHARACTER = '\uFFFD' | |
| NEWLINE_REPLACEMENT_STRING = ' ' + REPLACEMENT_CHARACTER + ' ' |
| def get_lower_tri_heatmap(df, output="cooc_matrix.png"): | |
| mask = np.zeros_like(df, dtype=np.bool) | |
| mask[np.triu_indices_from(mask)] = True | |
| # Want diagonal elements as well | |
| mask[np.diag_indices_from(mask)] = False | |
| # Set up the matplotlib figure | |
| f, ax = plt.subplots(figsize=(11, 9)) |
I hereby claim:
To claim this, I am signing this object:
| import nltk | |
| from nltk.tokenize import WordPunctTokenizer | |
| from nltk.collocations import BigramCollocationFinder | |
| from nltk.metrics import BigramAssocMeasures | |
| from nltk.corpus import stopwords | |
| nltk.download('stopwords') | |
| from nltk.collocations import TrigramCollocationFinder | |
| from nltk.metrics import TrigramAssocMeasures | |
| from collections import Counter |
| import dbm, os | |
| import cPickle as pickle | |
| from gensim.models import Word2Vec | |
| import numpy as np | |
| def save_model(model, directory): | |
| model.init_sims() # making sure syn0norm is initialised | |
| if not os.path.exists(directory): | |
| os.makedirs(directory) | |
| # Saving indexes as DBM'ed dictionary |