This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
from datetime import datetime | |
date_pattern = "%Y-%m-%dT%H:%M:%S.%fZ" | |
Point = namedtuple("Point", ("x", "y")) | |
def serialize_datetime(nt): | |
assert hasattr(nt, '_asdict') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pdfplumber | |
import itertools, collections, sys, os, re, json | |
from pprint import pprint as pr | |
from copy import deepcopy | |
from operator import itemgetter as at | |
class CartesianText: | |
__slots__ = ["text", "x0", "x1", "y0", "y1", "page_height"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import wikipedia | |
from bs4 import BeautifulSoup | |
def infobox(wiki_page): | |
"""Returns the infobox of a given wikipedia page""" | |
if isinstance(wiki_page, str): | |
wiki_page = wikipedia.page(wiki_page) | |
try: | |
soup = BeautifulSoup(wiki_page.html()).find_all("table", {"class": "infobox"})[0] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import collections, itertools, string | |
from scipy.cluster import hierarchy | |
from scipy.spatial import distance | |
from sklearn.feature_extraction import text | |
from editdistance import distance as editdistance | |
def edit_pdist(toks, normalize=False): | |
"""Return pairwise editdistance matrix""" | |
n = len(toks) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A python wrapper for the icount.co.il api | |
https://www.icount.co.il/api-v3/ | |
""" | |
import json | |
from urllib import request, parse | |
def post(url, data): | |
req = request.Request(url, data=parse.urlencode(data).encode()) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from html import unescape | |
def html2text(htm): | |
ret = unescape(htm) | |
ret = ret.translate({ | |
8209: ord('-'), | |
ord('`'): ord("'"), | |
ord('’'): ord("'"), | |
8220: ord('"'), | |
8221: ord('"'), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json, sys | |
from operator import itemgetter as at | |
fname = sys.argv[1] | |
assert fname.endswith('.json') | |
with open(fname, 'r') as f: | |
d = json.load(f) | |
with open(fname.replace('.json', '.csv'), 'w') as f: | |
f.write('{k},{v}\n'.format(k="key", v="val")) | |
for k,v in sorted(d.items(), key=at(1), reverse=True): | |
f.write('"{k}",{v}\n'.format(k=k.replace('"','""'), v=v)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
categorical_data = [ | |
(0,1,2), | |
(0,1), | |
(0,1,3), | |
(0,1,3), | |
(0,1,3), | |
(0,1,2,3), | |
(2, 3), | |
(2, 3), | |
(2, 3), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, os, json, subprocess | |
from argparse import ArgumentParser | |
__dir__ = os.path.dirname(os.path.abspath(__file__)) | |
def shell(cmd): | |
"""Run bash command""" | |
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
stdout, stderr = process.communicate() |