Skip to content

Instantly share code, notes, and snippets.

View ortsed's full-sized avatar

Llewellyn Jones ortsed

View GitHub Profile
@ortsed
ortsed / recursive_cte.sql
Created May 2, 2024 12:07
Recursive CTE SQL
WITH RECURSIVE NetworkCTE AS (
-- Anchor part to select the starting node
SELECT source_node,
target_node
FROM network_connections
WHERE source_node = 'Paolo' -- Change this to get someone else's network
UNION ALL
-- Recursive part to select connected nodes
@ortsed
ortsed / bayesian_likelihood.py
Created April 25, 2024 20:29
Bayesian Likelihood For Pandas
def bayesian_liklihood(df, target_col, target_val, prior_col):
"""
Gets the liklihood of a target value given a prior value using Bayesian formula
on a Pandas dataframe
df: Pandas dataframe
target_col: column being predicted
target_val: value in the target_col being predicted
@ortsed
ortsed / xicor.py
Created April 8, 2024 17:31
Xicor Correlation
## Python Function ##
from numpy import array, random, arange
def xicor(X, Y, ties=True):
random.seed(42)
n = len(X)
order = array([i[0] for i in sorted(enumerate(X), key=lambda x: x[1])])
if ties:
l = array([sum(y >= Y[order]) for y in Y[order]])
r = l.copy()
@ortsed
ortsed / logit_adjustment.py
Last active November 15, 2021 18:45
Logit Adjustment for Modeling Probabilities Using Scipy
from scipy.special import loggamma
from scipy.special import expit, logit
import numpy as np
from scipy.optimize import minimize
def logLikelihood(params, y, X):
b = np.array(params[0:-1]) # the beta parameters of the regression model
phi = params[-1] # the phi parameter
mu = expit(np.dot(X,b))
@ortsed
ortsed / gnb_importance.py
Created November 11, 2020 18:00
Gaussian Naive Bayes Importance
import numpy as np
neg = model.theta_[0].argsort()
print(np.take(X_test.columns, neg[:10]))
print('')
neg = model.sigma_[0].argsort()
print(np.take(X_test.columns, neg[:10]))
@ortsed
ortsed / usps_time_calc.py
Last active July 24, 2020 15:35
Calculates Delivery Time for Standard Shipping Through USPS
# Extends USPS api class to calculate time for Standard delivery from zip to zzip
from lxml import etree
def get_time(origin_zip, dest_zip):
usps = USPSApi(USPS_API_USER, test=True)
#usps.urls['calc'] = 'PriorityMail&XML={xml}'
usps.urls['calc'] = 'StandardB&XML={xml}'
@ortsed
ortsed / hurdle.py
Created June 10, 2020 17:01
Sklearn Implementation of Hurdle Model
from typing import Optional, Union
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.base import BaseEstimator
from sklearn.utils.estimator_checks import check_estimator
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from lightgbm import LGBMClassifier, LGBMRegressor
@ortsed
ortsed / mysqldump-to-csv.py
Created January 27, 2020 16:03
Mysqldump-to-csv
#!/usr/bin/env python
import fileinput
import csv
import sys
#https://github.com/jamesmishra/mysqldump-to-csv
# This prevents prematurely closed pipes from raising
# an exception in Python
from signal import signal, SIGPIPE, SIG_DFL
@ortsed
ortsed / karmarkar.py
Created January 22, 2020 20:47
Karmarkar's Algorithm
import numpy as np
class LPSolution(object):
def __init__(self):
self.iterations = None
self.tolerance = None
self.intermediates = []
self.solution = None
self.solution_string = None
@ortsed
ortsed / multi_gaussian.py
Created January 22, 2020 19:07
Multivariate Gaussian
def multivariateGaussian(X, mu, sigma):
k = len(mu)
sigma=np.diag(sigma)
X = X - mu.T
p = 1/((2*np.pi)**(k/2)*(np.linalg.det(sigma)**0.5))* np.exp(-0.5* np.sum(X @ np.linalg.pinv(sigma) * X,axis=1))
return p
p = multivariateGaussian(X, mu, sigma)