estasney’s gists

estasney / index.html

Created November 25, 2019 17:03

D3 Collapsible Tidy Tree (v5)

	<html>
	</<!DOCTYPE html>
	<html lang="en" dir="ltr">
	<head>
	<meta charset="utf-8">
	<title></title>
	<script src="https://d3js.org/d3.v5.js"></script>
	<script type="text/javascript" src="d3-hierarchy.js"></script>
	<style>
	.link {

estasney / macro

Created September 30, 2019 16:45

Copy from Excel to Word

	Sub Macro1()
	'
	' Macro1 Macro
	'
	' Keyboard Shortcut: Ctrl+w
	'
	Dim objWord As Word.Application
	Dim wordDoc As Word.Document

	Application.ScreenUpdating = False

estasney / combine spreadsheets

Created July 29, 2019 19:14

	import pandas as pd
	import os

	SPREADSHEET_FOLDER = r"C:\Users" # Which folder are these files located in?
	OUTPUT_FILEPATH = r"C:\Users" # Where should it go?

	if not os.path.isdir(SPREADSHEET_FOLDER):
	raise NotADirectoryError
	if '.xls' not in OUTPUT_FILEPATH and '.csv' not in OUTPUT_FILEPATH:
	raise Exception("Output path must have ext of .csv, .xls, or .xlsx")

estasney / backoff.py

Last active January 25, 2021 15:55

Backoff Decorator - Accepting Parameters

	import time
	import random

	class BackOffDecorator(object):

	def __init__(self, max_tries, delay, backoff_rate):
	self.max_tries = max_tries
	self.tries = 0
	self.delay = delay
	self.backoff_rate = backoff_rate

estasney / WordMap.py

Last active October 15, 2018 02:16

WordMappings

	from cytoolz import groupby

	class WordPair(object):

	PREFERRED = 'preferred'
	OTHERS = 'others'

	def __init__(self, preferred, others):
	self.preferred = preferred
	if isinstance(others, list):

estasney / manifest.json

Created September 9, 2018 18:20 — forked from siumeiman/manifest.json

Download LinkedIn

estasney / coloring.py

Created September 6, 2018 02:34

Pattern NLP

	import math
	import numpy as np
	import seaborn as sns
	import matplotlib.pyplot as plt

	sns.set()

	def node_type(x):
	if isinstance(x, str):
	return 2

estasney / extract_xml.py

Created July 13, 2018 01:46

Parsing XML of Stack Overflow Data Dumps

	from gensim.utils import smart_open
	from collections import defaultdict, OrderedDict
	import csv
	import xml.etree.ElementTree as ET

	headers = ['AcceptedAnswerId', 'AnswerCount', 'ClosedDate', 'CommentCount', 'CommunityOwnedDate', 'CreationDate',
	'FavoriteCount', 'Id', 'LastActivityDate', 'LastEditDate', 'LastEditorDisplayName', 'LastEditorUserId',
	'OwnerDisplayName', 'OwnerUserId', 'ParentId', 'PostTypeId', 'Score', 'Tags', 'Title', 'ViewCount']

	file_path = ""

estasney / analyze_tags.py

Created June 29, 2018 01:47

Co-occurrence of StackOverflow Tags

	# Given a StackOverflow tag, how often does it occur with other tags?
	# See example query https://data.stackexchange.com/stackoverflow/query/868423/co-occurrence-of-tags
	# Download CSV results

	import pandas as pd
	import re
	from sklearn.feature_extraction.text import CountVectorizer

	CSV_PATH = ""
	TAG = ""

estasney / name_search.py

Last active August 15, 2018 16:52

Performance oriented string search across multiple datasets

	from collections import OrderedDict
	from operator import itemgetter

	class NameData(object):

	def __init__(self, data, name, priority, preprocessor=None):
	self.data = self.structure_data(data)
	self.name_set = self.generate_set(data)
	self.name = name
	self.priority = priority