Maximilian Strauss straussmaximilian

Bioinformatics | Biophysics | Data Science | Mechanical Engineer - Medical Technology

straussmaximilian / 07_eda.py

Created November 22, 2022 21:44

	import pandas as pd
	import re
	from collections import Counter
	import spacy
	from tqdm import tqdm as tqdm
	from urllib.parse import urlparse
	import matplotlib.pyplot as plt

	# if en_core_web_sm is not installed
	# !python -m spacy download en_core_web_sm

straussmaximilian / 06_annotation.py

Created November 13, 2022 12:57

straussmaximilian / 04_perfplot2.py

Created September 21, 2019 13:07

	def fixed_query(size):
	"""
	Returns a random array of given size and
	a fixed number of random query points.
	"""
	array = random_array(size)
	query_points = random_array(1e4)
	return (array, query_points)

straussmaximilian / 04_kdtree.py

Created September 21, 2019 13:05

	from scipy.spatial import cKDTree

	def kdtree(data, delta=0.1):
	"""
	Constructs a 2D k-d-tree from the input array and queries the points within a square around a given point.
	"""
	array, query_points = data
	tree = cKDTree(array)
	count = 0
	for point in query_points:

straussmaximilian / 04_multiple_01.py

Created September 21, 2019 13:03

	@njit
	def boolean_index_numba_multiple(array, xmin, xmax, ymin, ymax, zmin, zmax):
	"""
	Takes a numpy array and isolates all points that are within [xmin, xmax]
	for the first dimension, between [ymin, ymax] for the second dimension
	and [zmin, zmax] for the third dimension by creating a boolean index.
	This function will be compiled with numba.
	"""
	index = ((array[:, 0] > xmin) & (array[:, 1] > ymin) & (array[:, 2] > zmin)
	& (array[:, 0] < xmax) & (array[:, 1] < ymax) & (array[:, 2] < zmax))

straussmaximilian / 04_perfplot_1.py

Created September 21, 2019 13:01

	import perfplot

	plt.figure(figsize=(10, 10))
	plt.title('Quantitative Comparison of Filtering Speeds')
	perfplot.show(
	setup=random_array,
	kernels=[loop, boolean_index, loop_numba, boolean_index_numba],
	n_range=[2**k for k in range(2, 22)],
	logx=True,
	logy=True,

straussmaximilian / 04_pandas.py

Created September 21, 2019 12:59

	#Pandas

	import pandas as pd
	df = pd.DataFrame({'x': array[:, 0], 'y': array[:, 1], 'z': array[:, 2]})

	# Pandas query
	print('Pandas Query:\t\t', end='')
	%timeit df.query('x >= 0.2 and x <= 0.4 and y >= 0.4 and y <= 0.6')

	# Pandas eval

straussmaximilian / 04_numba_functions.py

Created September 21, 2019 12:57

	from numba.typed import List
	from numba import njit

	@njit
	def boolean_index_numba(array):
	"""
	Takes a numpy array and isolates all points that are within [0.2,0.4] for
	the first dimension and between [0.4,0.6] for the second dimension
	by creating a boolean index.
	This function will be compiled with numba.

straussmaximilian / 04_boolean_index.py

Created September 21, 2019 12:56

	def boolean_index(array):
	"""
	Takes a numpy array and isolates all points that are within [0.2,0.4] for
	the first dimension and between [0.4,0.6] for
	the second dimension by creating a boolean index.
	"""
	index = (array[:, 0] >= 0.2) & (array[:, 1] >= 0.4) & (array[:, 0] <= 0.4) & (array[:, 1] <= 0.6)

	return array[index]

straussmaximilian / 04_python_functions.py

Created September 21, 2019 12:55

	# List comprehension

	def list_comprehension(tuple_list):
	"""
	Takes a list of tuples and isolates all points that are within [0.2,0.4]
	for the first dimension and between [0.4,0.6] for the second dimension
	using a list comprehension.
	"""
	filtered_list = [_ for _ in tuple_list if (_[0] >= 0.2) and (_[1] >= 0.4) and (_[0] <= 0.4) and (_[1] <= 0.6)]
	return filtered_list