Dmitry Nikolayev macleginn

macleginn / simulation_step.py

Last active February 11, 2022 12:01

A step in the simulation of random feature spread on a network guided by NPM

	import numpy as np

	# We're given an n by n distance matrix D with transfer
	# probabilities for a given pair of nodes (for any feature),
	# a feature matrix M, and a dropout probability p_d.

	# We convert the transfer probabilities to no-transfer probabilities
	# and take their logs
	L = np.log(1 - D)

macleginn / get_n_grams.py

Created June 26, 2021 08:32

	# Собираем вместе все возможные знаки пунктуации
	import sys
	from unicodedata import category
	chrs = (chr(i) for i in range(sys.maxunicode + 1))
	punctuation = set(c for c in chrs if category(c).startswith("P"))
	# Дефис бывает внутри слов
	punctuation.remove('-')


	def tokenize(s, lower_case=False):

macleginn / get_roberta_word_embeddings.py

Created June 21, 2021 07:17

Code for extracting word embeddings from RoBERTa

	def rm_whitespace(s):
	if s.startswith('Ġ'):
	return s[1:]
	else:
	return s


	def get_tokens_with_ranges(input_string, tokenizer):
	'''
	RoBERTa prepends 'Ġ' to the beginning of what it

macleginn / plot_spectra.py

Created October 6, 2020 13:45

	import pandas as pd
	import matplotlib.pyplot as plt

	d = pd.read_excel('spectrograms-relative-20.xlsx', header=None)
	# Combine the first two columns in a new index
	index_col = [ f'{a}-{b}' for a, b in zip(d.iloc[:,0], d.iloc[:,1]) ]
	d.index = index_col
	# Delete old index columns
	del d[0]
	del d[1]

macleginn / first_comma_collocates.py

Created October 2, 2020 09:23

A script for extracting first-comma collocates from the Bible corpus.

	import re
	import os
	from math import log
	from collections import Counter

	import pandas as pd


	def logL(p, k, n):
	return k * log(p) + (n - k) * log(1 - p)

macleginn / ilp.py

Last active April 6, 2020 19:02

	from itertools import combinations, permutations
	from collections import Counter

	import gurobipy as gb
	from gurobipy import GRB


	def get_pairwise_ordering(all_deprels: set, training_set_constraints: Counter):
	'''
	Solves an integer program and returns a non-loopy ordering

macleginn / pages_from_pdf.sh

Created March 14, 2020 12:33

A bash/zsh function to easily extract pages from .pdf files using qpdf

	# params:
	# $1 input-file path,
	# $2 page range (e.g., "1-1", "10-39", "5,9-12"),
	# $3 output-file path
	# ex.: pages_from_pdf input.pdf "1,3,8-9" test.pdf
	# qpdf should be installed
	function pages_from_pdf() {
	qpdf $1 --pages $1 $2 -- $3
	}

macleginn / stackedbarplot.py

Created November 21, 2019 12:53

	import pandas as pd
	import matplotlib.pyplot as plt
	import numpy as np

	# test.csv:
	# ,b,c,d
	# p,1,2,3
	# q,4,5,6
	# r,7,8,9

macleginn / savefig.py

Created November 21, 2019 09:03

	import numpy as np
	import matplotlib.pyplot as plt

	N = 5
	menMeans = (20, 35, 30, 35, 27)
	womenMeans = (25, 32, 34, 20, 25)
	menStd = (2, 3, 4, 1, 2)
	womenStd = (3, 5, 2, 3, 3)
	ind = np.arange(N) # the x locations for the groups
	width = 0.35 # the width of the bars: can also be len(x) sequence

macleginn / new-pos-rel-stats.py

Created October 31, 2019 10:43

	confusion_dict_pos = {}
	confusion_dict_paths = {}

	# NEW STUFF #
	addition_stats_pos = Counter()
	addition_stats_rel = Counter()
	# NEW STUFF #

	strip_direction = lambda x: x.split('_')[0]