This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import wikipedia | |
from bs4 import BeautifulSoup | |
def infobox(wiki_page): | |
"""Returns the infobox of a given wikipedia page""" | |
if isinstance(wiki_page, str): | |
wiki_page = wikipedia.page(wiki_page) | |
try: | |
soup = BeautifulSoup(wiki_page.html()).find_all("table", {"class": "infobox"})[0] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pdfplumber | |
import itertools, collections, sys, os, re, json | |
from pprint import pprint as pr | |
from copy import deepcopy | |
from operator import itemgetter as at | |
class CartesianText: | |
__slots__ = ["text", "x0", "x1", "y0", "y1", "page_height"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
from datetime import datetime | |
date_pattern = "%Y-%m-%dT%H:%M:%S.%fZ" | |
Point = namedtuple("Point", ("x", "y")) | |
def serialize_datetime(nt): | |
assert hasattr(nt, '_asdict') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, os | |
import streamlit as st | |
def file2page_name(fname): | |
return fname.replace('.py', '').split("_", 1)[1].title() | |
sys.path.append("..") | |
page_files = dict() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Options +SymLinksIfOwnerMatch | |
RewriteEngine on | |
RewriteCond %{REQUEST_FILENAME} !-f | |
RewriteCond %{REQUEST_FILENAME} !-d | |
RewriteRule ^((?!index\.php).+)$ /index.php?py=$1 [NC,L,QSA] |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, sys, json | |
from pathlib import Path | |
class ConfigReader: | |
def __init__(self, default=None, **kwargs): | |
self.default=default | |
self.py_file = Path(os.path.join(os.getcwd(), sys.argv[0])).absolute() | |
p = self.py_file.parent | |
found_config_json = [] | |
while p!=Path('/'): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from copy import deepcopy as clone | |
from sklearn.base import ClassifierMixin | |
from sklearn.pipeline import Pipeline | |
class ConditionedTextClassifier(ClassifierMixin): | |
def __init__(self, conditions, model, condition_sep=' <s> '): | |
self.condition_sep=condition_sep | |
self.conditions = {} | |
for c in conditions: | |
self.conditions[c] = clone(model) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import subprocess, sys | |
def bgprocess(p:Path, *args): | |
python = sys.executable | |
if not isinstance(p, Path): | |
p = Path(p) | |
p = p.absolute() | |
return subprocess.Popen([python, p.name]+list(args), cwd = str(p.parent), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from itertools import product | |
from scipy import sparse | |
from sklearn.base import TransformerMixin | |
class InteractionBySplit(TransformerMixin): | |
""" | |
Takes a sparse matrix as input, and an index to split by, and returns all possible interactions before and after that index. | |
""" | |
def __init__(self, split_index,*args,**kwargs): |