I hereby claim:
- I am nempickaxe on github.
- I am ilaichi (https://keybase.io/ilaichi) on keybase.
- I have a public key ASC0peYsZX_Z7LwCfPjY9FJz_772TLP9XsoLON6QsTED-go
To claim this, I am signing this object:
| import dbm, os | |
| import cPickle as pickle | |
| from gensim.models import Word2Vec | |
| import numpy as np | |
| def save_model(model, directory): | |
| model.init_sims() # making sure syn0norm is initialised | |
| if not os.path.exists(directory): | |
| os.makedirs(directory) | |
| # Saving indexes as DBM'ed dictionary |
| import nltk | |
| from nltk.tokenize import WordPunctTokenizer | |
| from nltk.collocations import BigramCollocationFinder | |
| from nltk.metrics import BigramAssocMeasures | |
| from nltk.corpus import stopwords | |
| nltk.download('stopwords') | |
| from nltk.collocations import TrigramCollocationFinder | |
| from nltk.metrics import TrigramAssocMeasures | |
| from collections import Counter |
I hereby claim:
To claim this, I am signing this object:
| def get_lower_tri_heatmap(df, output="cooc_matrix.png"): | |
| mask = np.zeros_like(df, dtype=np.bool) | |
| mask[np.triu_indices_from(mask)] = True | |
| # Want diagonal elements as well | |
| mask[np.diag_indices_from(mask)] = False | |
| # Set up the matplotlib figure | |
| f, ax = plt.subplots(figsize=(11, 9)) |
| import textwrap | |
| import PIL | |
| from PIL import ImageFont | |
| from PIL import Image | |
| from PIL import ImageDraw | |
| def text2png(text, fullpath, color = "#000", bgcolor = "#FFF", fontfullpath = None, fontsize = 13, leftpadding = 3, rightpadding = 3, width = 2000): | |
| REPLACEMENT_CHARACTER = '\uFFFD' | |
| NEWLINE_REPLACEMENT_STRING = ' ' + REPLACEMENT_CHARACTER + ' ' |
| import re | |
| import nltk | |
| import emoji | |
| from nltk.tokenize import word_tokenize | |
| def tokenize(corpus): | |
| data = re.sub(r'[,!?;-]+', '.', corpus) | |
| data = nltk.word_tokenize(data) # tokenize string to words | |
| data = [ ch.lower() for ch in data | |
| if ch.isalpha() |
| import re | |
| import yaml | |
| def parse_config(vars_dict, path=None, data=None, tag='!ENV'): | |
| """ | |
| Load a yaml configuration file and resolve any environment variables | |
| The environment variables must have !ENV before them and be in this format | |
| to be parsed: $<VAR_NAME>. | |
| E.g.: | |
| database: |
| def get_interval(space_list, width): | |
| for i in range(len(space_list)-1): | |
| if space_list[i+1]>width: | |
| return space_list[i] | |
| else: | |
| continue | |
| return space_list[-1] | |
| def get_subtracted_list(space_list, width): | |
| return list(map(lambda x: int(((x-width)+abs(x-width))/2), space_list)) |
| import sys | |
| from types import ModuleType, FunctionType | |
| from gc import get_referents | |
| # Custom objects know their class. | |
| # Function objects seem to know way too much, including modules. | |
| # Exclude modules as well. | |
| BLACKLIST = type, ModuleType, FunctionType | |
| def read_mongo_collection(uri, pipeline=None, given_schema=None, spark=None): | |
| """ | |
| :param uri: uri for mongo connection | |
| :param pipeline: pipeline option for pushing queries to mongo | |
| :param given_schema: schema option, will read in mentioned schema | |
| :return: dataframe after reading from mongo | |
| """ | |
| if pipeline: | |
| if not given_schema: | |
| return spark.read.format("com.mongodb.spark.sql.DefaultSource").option("pipeline", pipeline).option( |