MNoorFawi’s gists

MNoorFawi / lz78.py

Created May 29, 2020 19:33

Lempel-Ziv algorithm in python for list compression.

	def glue_seq(seq, last_separate = False):
	if last_separate:
	s = seq.split()
	return " ".join(s[:-1]), s[-1]
	else:
	return " ".join(seq)

	def lz78(data):
	"""Normal Lempel-Ziv78 which assigns codes for each new encountered sequence."""

MNoorFawi / huffman_coding.py

Last active May 29, 2020 19:46

Huffman Coding in python

	from bisect import insort
	from collections import deque

	## to store codes for huffman coding
	class CodeHeap():
	def __init__(self):
	self._container = deque([])

	def push(self, item):
	insort(self._container, item) # in by sort

MNoorFawi / binary_search.py

Created May 29, 2020 19:59

binary search in python that returns all repeated occurrences of an item

	def range_binary_search(lst, key):
	# search for the indices of the key in the sorted container
	# start to get last occurrence index at the right side
	right = 0
	length = len(lst)
	while right < length:
	middle = (right + length) // 2
	f = lst[middle]
	if key < f:
	length = middle

MNoorFawi / simulated_annealing.py

Created May 29, 2020 20:09

Simulated Annealing algorithm in python

	import random
	import math

	def simulated_annealing(domain, costf, temp = 10000.0,
	cool = 0.95, step = 1):
	"""simulated annealing optimization algorithm that takes a cost function and tries to minimize it by
	looking for solutions from the given domain
	requirements: is to define the costf which is needed to be minimized for error functions
	and domain which is a random solution to begin with"""

MNoorFawi / wknn.py

Created May 29, 2020 20:15

Weighted K-Nearest Neighbor (KNN) algorithm in python

	import math
	from sklearn.neighbors import KDTree

	# different weighting functions to use
	def inverseweight(dist, num = 1.0, const = 0.1):
	return num / (dist + const)

	def gaussian(dist, sigma = 10.0):
	return math.e (- dist 2 / ( 2 * sigma ** 2))

MNoorFawi / one_hot_encode.R

Created May 29, 2020 21:33

One-Hot Encoding in R

	### one-hot encoding
	vars <- colnames(data)
	## to one hot encode factor values and normalize numeric ones if needed
	cat_vars <- vars[sapply(data[, vars], class) %in%
	c("factor", "character", "logical")]
	data2 <- data[, cat_vars]
	for (i in cat_vars) {
	dict <- unique(data2[, i])
	for (key in dict) {
	data2[[paste0(i, "_", key)]] <- 1.0 * (data2[, i] == key)

MNoorFawi / single_var_model.R

Created May 29, 2020 21:41

Single Variable Model in R

	## How to create a single variable model in R
	single_variable_model <- function(x, y, pos) {
	if (class(x) %in% c("numeric", "integer")) {
	# if numeric descretize it
	probs <- unique(quantile(x, probs = seq(0.1, 1, 0.1), na.rm = T))
	x <- cut(x, breaks = probs, include.lowest = T)
	}
	prob_table <- table(as.factor(y), x)
	vals <- unique(y)
	neg <- vals[which(vals != pos)]

MNoorFawi / trn_tst_preprocess.jl

Created May 29, 2020 22:12

preprocessing train and test data frames in Julia (v0.6.4)

	# one hot encoding string columns and normalizing numeric ones
	# the function prepares new coming and/or test dataframe using existing/training dataframe
	function preprocess(new::DataFrame, old::DataFrame)
	dataType = describe(old)
	x = DataFrame()
	d = DataFrame()
	str = dataType[dataType[:eltype] .== String, :variable]
	num = dataType[(dataType[:eltype] .== Float64) .\| (dataType[:eltype] .== Int64), :variable]
	str = setdiff(str, [names(old)[end]])
	for i in str

MNoorFawi / recommender_system.cypher

Last active May 29, 2020 22:27

recommender system in Neo4j using Jaccard Index

	// performing film recommendation system inside Neo4j database
	// using Jaccard Index as similarity measurement

	MATCH (c1:Customer)-[:RENTED]->(f:Film)<-[:RENTED]-(c2:Customer)
	WHERE c1 <> c2 AND c1.customerID = "13" // an example of a user index
	WITH c1, c2, COUNT(DISTINCT f) as intersection

	MATCH (c:Customer)-[:RENTED]->(f:Film)
	WHERE c in [c1, c2]
	WITH c1, c2, intersection, COUNT(DISTINCT f) as union

MNoorFawi / hamming_dist.sql

Created May 29, 2020 22:32

Hamming Distance in PostgreSQL Database

	CREATE OR REPLACE FUNCTION hamming_distance(
	A0 bigint, A1 bigint, A2 bigint, A3 bigint,
	B0 bigint, B1 bigint, B2 bigint, B3 bigint
	)
	RETURNS integer AS $$
	BEGIN
	RETURN
	bits_count(A0 # B0) +
	bits_count(A1 # B1) +
	bits_count(A2 # B2) +

Muhammad Noor Fawi MNoorFawi