Alex aSipiere

💅

flexin'

Data Scientist

aSipiere / blaseball_names.json

Last active October 19, 2020 21:56

blaseball_names.json

aSipiere / wgs84_to_webmercator.py

Created September 17, 2020 11:35

	import pandas as pd
	from pyproj import Transformer


	def towgs84(row, lat_col, lon_col):
	"""An apply function for a pyproj transformer from 4326/WGS84 to 3857/web mercator

	:param row: a row of a pandas dataframe as supplied by df.apply().
	:param lat_col: str of the lat col
	:param lon_col: str of the lon col

aSipiere / gpd_nearest_point.py

Created September 9, 2020 17:09

get nearest point geopandas

	import geopandas as gpd
	from shapely.ops import nearest_points

	def nearest(row, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None):
	"""Find the nearest point and return the corresponding value from specified column."""
	# Construct a multipoint object
	geom_union = df2.unary_union
	# Find the geometry that is closest
	nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1]
	# Get the corresponding value from df2 (matching is based on the geometry)

aSipiere / blob2s3.py

Created August 6, 2020 12:55

	import os
	import tempfile
	import uuid
	import argparse

	import boto3
	import azure.storage.blob as blob
	import tqdm

aSipiere / csv_splitter.py

Created August 5, 2020 10:15

A python 3.8 update of: https://gist.github.com/jrivero/1085501 with argparse and tqdm.

	import os
	import csv
	import argparse
	from tqdm import tqdm

	def split(filehandler, delimiter=',', row_limit=10000,
	output_name_template='output_%s.csv', output_path='.', keep_headers=True):
	"""
	Splits a CSV file into multiple pieces.

aSipiere / runner.py

Created March 25, 2020 16:22

aSipiere / s3_bulk_rename.py

Created October 28, 2019 10:28

Bulk renaming extensions in s3

	import boto3
	import botocore
	from joblib import Parallel, delayed
	import os

	bucket_name = "bucket_name"
	s3 = boto3.resource('s3')
	bucket = s3.Bucket(bucketname)
	exists = True
	try:

aSipiere / r_dd.py

Created April 9, 2019 11:11

Recursive Default Dictionary

	from collections import defaultdict

	r_dd = lambda: defaultdict(r_dd)

aSipiere / classification_report_to_multindex_df.py

Created January 24, 2019 12:56

	reports = {}

	for name, clf in zip(classifier_names, classifiers):
	clf.fit(X_train, Y_train)
	Y_predictions = clf.predict(X_test)
	Y_true = Y_test
	reports[name] = classification_report(Y_true, Y_predictions, output_dict=True)

	reporting_df = pd.DataFrame.from_dict({(i,j): reports[i][j]
	for i in reports.keys()

aSipiere / classifier_comparison.py

Created January 15, 2019 14:30

	from sklearn.neural_network import MLPClassifier
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.svm import SVC
	from sklearn.gaussian_process import GaussianProcessClassifier
	from sklearn.gaussian_process.kernels import RBF
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
	from sklearn.naive_bayes import GaussianNB
	from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis