jcrousse’s gists

jcrousse / async_streams.py

Created March 8, 2022 20:06

Using CUDA streams with CuPy

	import cupy as cp
	import time
	import asyncio


	async def predict(N, power):
	compute_stream = cp.cuda.stream.Stream(non_blocking=True)
	compute_stream.use()
	d_mat = cp.random.randn(N * N, dtype=cp.float64).reshape(N, N)
	d_ret = d_mat

jcrousse / prefixspan

Created September 12, 2020 13:56

	from prefixspan import PrefixSpan

	from data_sources.data_generator import ExamplesGenerator, get_multiple_patterns

	VOCAB_SIZE = 1000
	SEQ_LEN = 250
	multiple_patterns = get_multiple_patterns(10)

	NUM_EXAMPLES = 200
	MIN_FREQ = 25

jcrousse / keras_models.py

Created July 21, 2020 09:40

SOS Keras model 2.1

	def get_learned_scores(**kwargs):
	"""
	scores each sentence, then multiply by score before next sequence layer.
	:Keyword Arguments:
	* sent_len (int) Sentence length
	* embedding_size (int) word embedding length
	* seq_len (int) length of overall sequence, equal to number of sentences x number of words per sentence
	* pre_embedded (bool) True if input is already vectors of word embeddings, false if tokens to be embedded
	:param : (int)
	"""

jcrousse / keras_models.py

Created June 7, 2020 10:30

sequence of sequence model

	def get_learned_scores(**kwargs):
	"""
	scores each sentence, then multiply by score before next sequence layer.
	:Keyword Arguments:
	* sent_len (int) Sentence length
	* embedding_size (int) word embedding length
	* seq_len (int) length of overall sequence, equal to number of sentences x number of words per sentence
	* pre_embedded (bool) True if input is already vectors of word embeddings, false if tokens to be embedded
	* concat_outputs (bool) True for a model with two similar outputs (2 level sequence model), False for
	a single output attention model (weighted average of sentences)

jcrousse / gist:3e86eeca431eeac27b7cbf2be52b7ad1

Created June 7, 2020 10:21

html_text_highlights

	def write_to_html(sentences, highlight_vals, filename, low_val=(255, 255, 255), high_val=(77, 145, 255),
	out_dir=OUT_FOLDER):
	scaled_hl = [e * (1 / max(highlight_vals)) for e in highlight_vals]
	with open(Path(out_dir) / filename, 'w') as f:
	for sent, score in zip(sentences, scaled_hl):
	color_vals = [int(low(1-score) + highscore) for low, high in zip(low_val, high_val)]
	f.write(f"<span style=\"background-color: rgb({color_vals[0]},{color_vals[1]},{color_vals[2]})\">"
	f"{sent}</span>\n")

jcrousse / keras_models.py

Created June 7, 2020 10:16

	def get_learned_scores(**kwargs):
	"""
	scores each sentence, then multiply by score before next sequence layer
	"""
	sent_len = kwargs.get('sent_len')
	embed_size = kwargs.get('embedding_size')
	seq_len = kwargs.get("seq_len")
	pre_embedded = kwargs.get("pre_embedded", False)
	assert seq_len % sent_len == 0, "sequence length must be a multiple of sentence length"
	sent_per_obs = seq_len // sent_len