Skip to content

Instantly share code, notes, and snippets.

@bkj
bkj / latlon2cartesian-new.py
Last active June 14, 2018 00:23
latlon2cartesian-new.py
#!/usr/bin/env python
"""
latlon2cartesian-new.py
"""
EARTH_RADIUS = 6371
def haversine_distance(point1, point2, radius=EARTH_RADIUS):
assert point1.shape[1] == 2
assert point2.shape[1] == 2
@bkj
bkj / latlon2cartesian-new.py
Created June 14, 2018 00:20
latlon2cartesian-new.py
EARTH_RADIUS = 6371
def haversine_distance(point1, point2, radius=EARTH_RADIUS):
assert point1.shape[1] == 2
assert point2.shape[1] == 2
latitude1, longitude1 = np.radians(point1).T
latitude2, longitude2 = np.radians(point2).T
dlongitude = longitude2 - longitude1
dlatitude = latitude2 - latitude1
@bkj
bkj / prep-movielens.sh
Created June 13, 2018 13:26
prep-movielens.sh
#!/bin/bash
wget http://files.grouplens.org/datasets/movielens/ml-100k.zip
wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
wget http://files.grouplens.org/datasets/movielens/ml-20m.zip
ls | grep zip | xargs -I {} unzip {}
mv ml-100k/u.data ./ml-100k-ratings
cat ./ml-100k-ratings | cut -d$'\t' -f1,2,3 > tmp && mv tmp ./ml-100k-ratings
import torch
import numpy as np
from torch.nn import function as F
def sparse_bce_with_logits(x_, rows, cols):
""" faster than `F.binary_cross_entropy_with_logits`, given sufficient sparsity """
sx_ = F.sigmoid(x_)
sx_sel_ = sx_[rows, cols]
pos = sx_sel_.log().sum() - (1 - sx_sel_).log().sum()
all_neg = (1 - sx_).log().sum()
from time import time
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
def do_time(loader, epochs=3):
t = time()
for epoch in range(epochs):
for _ in loader:
pass
import h5py
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
class H5Dataset(Dataset):
def __init__(self, h5_path):
self.h5_path = h5_path
self.h5_file = h5py.File(h5_path, 'r')
# --
# Helpers
def do_time(trainloader):
t = time()
for epoch in range(3):
for _ in trainloader:
pass
@bkj
bkj / bug.py
Last active January 20, 2018 20:21
#!/usr/bin/env python
from __future__ import print_function
import sys
import numpy as np
from time import time
if __name__ == "__main__":
np.random.seed(123)
@bkj
bkj / kdtree.py
Last active January 12, 2018 03:17
#!/usr/bin/env python
"""
kdtree.py
"""
import numpy as np
import pandas as pd
from copy import copy
from itertools import chain
# pyspark -- partition by key
def partition_by_key(x):
key_lookup = x.keys().distinct().collect()
key_lookup = dict(zip(key_lookup), range(len(key_lookup)))
return x.partitionBy(len(key_lookup), partitionFunc=lambda k: key_lookup[k])