This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import re | |
from collections import Counter | |
import spacy | |
from tqdm import tqdm as tqdm | |
from urllib.parse import urlparse | |
import matplotlib.pyplot as plt | |
# if en_core_web_sm is not installed | |
# !python -m spacy download en_core_web_sm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import streamlit as st | |
from PIL import Image | |
import os | |
import random | |
state = st.session_state | |
BASE_PATH = "./DICE_IMAGES_100/" | |
OPTIONS = ["1", "2", "3", "4", "5", "6", "NA"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fixed_query(size): | |
""" | |
Returns a random array of given size and | |
a fixed number of random query points. | |
""" | |
array = random_array(size) | |
query_points = random_array(1e4) | |
return (array, query_points) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.spatial import cKDTree | |
def kdtree(data, delta=0.1): | |
""" | |
Constructs a 2D k-d-tree from the input array and queries the points within a square around a given point. | |
""" | |
array, query_points = data | |
tree = cKDTree(array) | |
count = 0 | |
for point in query_points: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@njit | |
def boolean_index_numba_multiple(array, xmin, xmax, ymin, ymax, zmin, zmax): | |
""" | |
Takes a numpy array and isolates all points that are within [xmin, xmax] | |
for the first dimension, between [ymin, ymax] for the second dimension | |
and [zmin, zmax] for the third dimension by creating a boolean index. | |
This function will be compiled with numba. | |
""" | |
index = ((array[:, 0] > xmin) & (array[:, 1] > ymin) & (array[:, 2] > zmin) | |
& (array[:, 0] < xmax) & (array[:, 1] < ymax) & (array[:, 2] < zmax)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import perfplot | |
plt.figure(figsize=(10, 10)) | |
plt.title('Quantitative Comparison of Filtering Speeds') | |
perfplot.show( | |
setup=random_array, | |
kernels=[loop, boolean_index, loop_numba, boolean_index_numba], | |
n_range=[2**k for k in range(2, 22)], | |
logx=True, | |
logy=True, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Pandas | |
import pandas as pd | |
df = pd.DataFrame({'x': array[:, 0], 'y': array[:, 1], 'z': array[:, 2]}) | |
# Pandas query | |
print('Pandas Query:\t\t', end='') | |
%timeit df.query('x >= 0.2 and x <= 0.4 and y >= 0.4 and y <= 0.6') | |
# Pandas eval |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from numba.typed import List | |
from numba import njit | |
@njit | |
def boolean_index_numba(array): | |
""" | |
Takes a numpy array and isolates all points that are within [0.2,0.4] for | |
the first dimension and between [0.4,0.6] for the second dimension | |
by creating a boolean index. | |
This function will be compiled with numba. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def boolean_index(array): | |
""" | |
Takes a numpy array and isolates all points that are within [0.2,0.4] for | |
the first dimension and between [0.4,0.6] for | |
the second dimension by creating a boolean index. | |
""" | |
index = (array[:, 0] >= 0.2) & (array[:, 1] >= 0.4) & (array[:, 0] <= 0.4) & (array[:, 1] <= 0.6) | |
return array[index] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# List comprehension | |
def list_comprehension(tuple_list): | |
""" | |
Takes a list of tuples and isolates all points that are within [0.2,0.4] | |
for the first dimension and between [0.4,0.6] for the second dimension | |
using a list comprehension. | |
""" | |
filtered_list = [_ for _ in tuple_list if (_[0] >= 0.2) and (_[1] >= 0.4) and (_[0] <= 0.4) and (_[1] <= 0.6)] | |
return filtered_list |
NewerOlder