Skip to content

Instantly share code, notes, and snippets.

View straussmaximilian's full-sized avatar

Maximilian Strauss straussmaximilian

  • University of Copenhagen | Max Planck Institute
  • Copenhagen
View GitHub Profile
import pandas as pd
import re
from collections import Counter
import spacy
from tqdm import tqdm as tqdm
from urllib.parse import urlparse
import matplotlib.pyplot as plt
# if en_core_web_sm is not installed
# !python -m spacy download en_core_web_sm
import streamlit as st
from PIL import Image
import os
import random
state = st.session_state
BASE_PATH = "./DICE_IMAGES_100/"
OPTIONS = ["1", "2", "3", "4", "5", "6", "NA"]
def fixed_query(size):
"""
Returns a random array of given size and
a fixed number of random query points.
"""
array = random_array(size)
query_points = random_array(1e4)
return (array, query_points)
from scipy.spatial import cKDTree
def kdtree(data, delta=0.1):
"""
Constructs a 2D k-d-tree from the input array and queries the points within a square around a given point.
"""
array, query_points = data
tree = cKDTree(array)
count = 0
for point in query_points:
@njit
def boolean_index_numba_multiple(array, xmin, xmax, ymin, ymax, zmin, zmax):
"""
Takes a numpy array and isolates all points that are within [xmin, xmax]
for the first dimension, between [ymin, ymax] for the second dimension
and [zmin, zmax] for the third dimension by creating a boolean index.
This function will be compiled with numba.
"""
index = ((array[:, 0] > xmin) & (array[:, 1] > ymin) & (array[:, 2] > zmin)
& (array[:, 0] < xmax) & (array[:, 1] < ymax) & (array[:, 2] < zmax))
import perfplot
plt.figure(figsize=(10, 10))
plt.title('Quantitative Comparison of Filtering Speeds')
perfplot.show(
setup=random_array,
kernels=[loop, boolean_index, loop_numba, boolean_index_numba],
n_range=[2**k for k in range(2, 22)],
logx=True,
logy=True,
#Pandas
import pandas as pd
df = pd.DataFrame({'x': array[:, 0], 'y': array[:, 1], 'z': array[:, 2]})
# Pandas query
print('Pandas Query:\t\t', end='')
%timeit df.query('x >= 0.2 and x <= 0.4 and y >= 0.4 and y <= 0.6')
# Pandas eval
from numba.typed import List
from numba import njit
@njit
def boolean_index_numba(array):
"""
Takes a numpy array and isolates all points that are within [0.2,0.4] for
the first dimension and between [0.4,0.6] for the second dimension
by creating a boolean index.
This function will be compiled with numba.
def boolean_index(array):
"""
Takes a numpy array and isolates all points that are within [0.2,0.4] for
the first dimension and between [0.4,0.6] for
the second dimension by creating a boolean index.
"""
index = (array[:, 0] >= 0.2) & (array[:, 1] >= 0.4) & (array[:, 0] <= 0.4) & (array[:, 1] <= 0.6)
return array[index]
# List comprehension
def list_comprehension(tuple_list):
"""
Takes a list of tuples and isolates all points that are within [0.2,0.4]
for the first dimension and between [0.4,0.6] for the second dimension
using a list comprehension.
"""
filtered_list = [_ for _ in tuple_list if (_[0] >= 0.2) and (_[1] >= 0.4) and (_[0] <= 0.4) and (_[1] <= 0.6)]
return filtered_list