Skip to content

Instantly share code, notes, and snippets.

@estasney
estasney / index.html
Created November 25, 2019 17:03
D3 Collapsible Tidy Tree (v5)
<html>
</<!DOCTYPE html>
<html lang="en" dir="ltr">
<head>
<meta charset="utf-8">
<title></title>
<script src="https://d3js.org/d3.v5.js"></script>
<script type="text/javascript" src="d3-hierarchy.js"></script>
<style>
.link {
@estasney
estasney / macro
Created September 30, 2019 16:45
Copy from Excel to Word
Sub Macro1()
'
' Macro1 Macro
'
' Keyboard Shortcut: Ctrl+w
'
Dim objWord As Word.Application
Dim wordDoc As Word.Document
Application.ScreenUpdating = False
import pandas as pd
import os
SPREADSHEET_FOLDER = r"C:\Users" # Which folder are these files located in?
OUTPUT_FILEPATH = r"C:\Users" # Where should it go?
if not os.path.isdir(SPREADSHEET_FOLDER):
raise NotADirectoryError
if '.xls' not in OUTPUT_FILEPATH and '.csv' not in OUTPUT_FILEPATH:
raise Exception("Output path must have ext of .csv, .xls, or .xlsx")
@estasney
estasney / backoff.py
Last active January 25, 2021 15:55
Backoff Decorator - Accepting Parameters
import time
import random
class BackOffDecorator(object):
def __init__(self, max_tries, delay, backoff_rate):
self.max_tries = max_tries
self.tries = 0
self.delay = delay
self.backoff_rate = backoff_rate
@estasney
estasney / WordMap.py
Last active October 15, 2018 02:16
WordMappings
from cytoolz import groupby
class WordPair(object):
PREFERRED = 'preferred'
OTHERS = 'others'
def __init__(self, preferred, others):
self.preferred = preferred
if isinstance(others, list):
@estasney
estasney / manifest.json
Created September 9, 2018 18:20 — forked from siumeiman/manifest.json
Download LinkedIn
{
  "manifest_version": 2,
  "name": "LinkedIn Profile Saver",
  "version": "1.0.0",
  "content_scripts": [{
    "matches": [
      "http://*.linkedin.com/in/*",
      "https://*.linkedin.com/in/*",
      "http://*.linkedin.com/profile/*",
      "https://*.linkedin.com/profile/*"
@estasney
estasney / coloring.py
Created September 6, 2018 02:34
Pattern NLP
import math
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()
def node_type(x):
if isinstance(x, str):
return 2
@estasney
estasney / extract_xml.py
Created July 13, 2018 01:46
Parsing XML of Stack Overflow Data Dumps
from gensim.utils import smart_open
from collections import defaultdict, OrderedDict
import csv
import xml.etree.ElementTree as ET
headers = ['AcceptedAnswerId', 'AnswerCount', 'ClosedDate', 'CommentCount', 'CommunityOwnedDate', 'CreationDate',
'FavoriteCount', 'Id', 'LastActivityDate', 'LastEditDate', 'LastEditorDisplayName', 'LastEditorUserId',
'OwnerDisplayName', 'OwnerUserId', 'ParentId', 'PostTypeId', 'Score', 'Tags', 'Title', 'ViewCount']
file_path = ""
@estasney
estasney / analyze_tags.py
Created June 29, 2018 01:47
Co-occurrence of StackOverflow Tags
# Given a StackOverflow tag, how often does it occur with other tags?
# See example query https://data.stackexchange.com/stackoverflow/query/868423/co-occurrence-of-tags
# Download CSV results
import pandas as pd
import re
from sklearn.feature_extraction.text import CountVectorizer
CSV_PATH = ""
TAG = ""
@estasney
estasney / name_search.py
Last active August 15, 2018 16:52
Performance oriented string search across multiple datasets
from collections import OrderedDict
from operator import itemgetter
class NameData(object):
def __init__(self, data, name, priority, preprocessor=None):
self.data = self.structure_data(data)
self.name_set = self.generate_set(data)
self.name = name
self.priority = priority