Skip to content

Instantly share code, notes, and snippets.

View aSipiere's full-sized avatar
💅
flexin'

Alex aSipiere

💅
flexin'
  • London
View GitHub Profile
@aSipiere
aSipiere / blaseball_names.json
Last active October 19, 2020 21:56
blaseball_names.json
{
"first names": {
"Oliver": 4,
"Kennedy": 4,
"Adkins": 4,
"Wyatt": 4,
"Theodore": 4,
"Nicholas": 4,
"Alston": 3,
"Donia": 3,
import pandas as pd
from pyproj import Transformer
def towgs84(row, lat_col, lon_col):
"""An apply function for a pyproj transformer from 4326/WGS84 to 3857/web mercator
:param row: a row of a pandas dataframe as supplied by df.apply().
:param lat_col: str of the lat col
:param lon_col: str of the lon col
@aSipiere
aSipiere / gpd_nearest_point.py
Created September 9, 2020 17:09
get nearest point geopandas
import geopandas as gpd
from shapely.ops import nearest_points
def nearest(row, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None):
"""Find the nearest point and return the corresponding value from specified column."""
# Construct a multipoint object
geom_union = df2.unary_union
# Find the geometry that is closest
nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1]
# Get the corresponding value from df2 (matching is based on the geometry)
import os
import tempfile
import uuid
import argparse
import boto3
import azure.storage.blob as blob
import tqdm
@aSipiere
aSipiere / csv_splitter.py
Created August 5, 2020 10:15
A python 3.8 update of: https://gist.github.com/jrivero/1085501 with argparse and tqdm.
import os
import csv
import argparse
from tqdm import tqdm
def split(filehandler, delimiter=',', row_limit=10000,
output_name_template='output_%s.csv', output_path='.', keep_headers=True):
"""
Splits a CSV file into multiple pieces.
import foo
import argparse
def main(quux, quuz, **kwargs):
try:
if quux is True:
foo.bar()
else:
foo.baz()
except Exception as ex:
@aSipiere
aSipiere / s3_bulk_rename.py
Created October 28, 2019 10:28
Bulk renaming extensions in s3
import boto3
import botocore
from joblib import Parallel, delayed
import os
bucket_name = "bucket_name"
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucketname)
exists = True
try:
@aSipiere
aSipiere / r_dd.py
Created April 9, 2019 11:11
Recursive Default Dictionary
from collections import defaultdict
r_dd = lambda: defaultdict(r_dd)
reports = {}
for name, clf in zip(classifier_names, classifiers):
clf.fit(X_train, Y_train)
Y_predictions = clf.predict(X_test)
Y_true = Y_test
reports[name] = classification_report(Y_true, Y_predictions, output_dict=True)
reporting_df = pd.DataFrame.from_dict({(i,j): reports[i][j]
for i in reports.keys()
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis