Skip to content

Instantly share code, notes, and snippets.

View paceaux's full-sized avatar
🌐
Obsessing with languagey things

Paceaux paceaux

🌐
Obsessing with languagey things
View GitHub Profile
@paceaux
paceaux / cosine_similarity.py
Last active April 20, 2026 03:18
Cosine Similarity with numpy
import numpy as np
import math
def get_cosine_sim(a, b):
a = np.array(a)
b = np.array(b)
aq = np.square(np.abs(a))
bq =np.square(np.abs(b))
@paceaux
paceaux / ppmi.py
Created April 18, 2026 20:01
Positive Pointwise Mutual Information
import numpy as np
def get_ppmi(term_terms):
term_terms = np.array(term_terms)
total = term_terms.sum()
pxy = term_terms / total
px = pxy.sum(axis=1, keepdims=True)
@paceaux
paceaux / char-ngrams.py
Last active April 6, 2026 15:24
Ngrams n data
from collections import Counter
import re
def char_ngrams(word, size=1):
"""
Generates character-level n-grams for a given string with start/end markers.
Args:
word (str): The input string to process.
size (int): The length of the n-gram window. Defaults to 1.
@paceaux
paceaux / vector-functions.js
Created October 13, 2025 01:31
Vector Functions
/*
* @description Gets the euclidian distance of two vectors
* @param {Array.<number>} v1 - a vector of numbers
* @param {Array.<number>} v2 - a vector of numbers
* @returns {number} A float
*/
function euclidian(v1, v2) {
const squares = v1.map((el, idx) => {
const subtr = el - v2[idx];
return Math.pow(subtr, 2)
@paceaux
paceaux / randomString
Last active July 25, 2025 17:49
Random String Generator
// Note: Note cryptographically secure.
function randomString(size = 5) {
const stringArray = [...Math.random().toString()].slice(2, 2 + size);
const intArray = stringArray.map(string => parseInt(string, 10));
let chars = intArray.reduce((acc, c, idx) => {
const charPos = [65,97];
let charStart = charPos[c%2];
const charOffset = Math.ceil(Math.random() * 10)%3;
charStart = (charOffset * 8) + charStart;
return String.fromCharCode(c + charStart) + acc;
@paceaux
paceaux / debug.html.twig
Last active May 27, 2025 16:24
Drupal Field Node Debugging
<dl>
<dt><code>attributes</code></dt>
<dd>{{dd(attributes)}}</dd>
<dt><code>label_hidden</code></dt>
<dd>{{dd(label_hidden)}}</dd>
<dt><code>title_attributes</code></dt>
<dd>{{dd(title_attributes)}}</dd>
<dt><code>label</code></dt>
<dd>{{dd(label)}}</dd>
@paceaux
paceaux / regexes.en.js
Last active March 4, 2025 19:41
English Language RegExes
const wordSeparatorsRegex = /—\.,;:!?‽¡¿⸘()\\[\\]{}<>«»…‘“”"\s/g;
/*
fuck | s|er|ed|ing, motherfucker
shit + s|ton|ing|ting, bullshit
dick + s|head|hole|ed
ass + hole|hat|face
cock +s
*/
const profanityRegex = /((\b)?(fuck)(\w+)?)|((\b)?shit(\w+)?)|((\b)dick(\w+|\b))|((\b)ass(\w+|\b))|((\b)cocks?\b)|((\b)cunts?\b)|((\b)twats?\b)|(wtf)|(stfu)/gi;
@paceaux
paceaux / evil.php
Created September 1, 2024 18:25
Basic Hacker management for folks looking for PHP vulnerabilities
<?php
function getUserIP() {
if( array_key_exists('HTTP_X_FORWARDED_FOR', $_SERVER) && !empty($_SERVER['HTTP_X_FORWARDED_FOR']) ) {
if (strpos($_SERVER['HTTP_X_FORWARDED_FOR'], ',')>0) {
$addr = explode(",",$_SERVER['HTTP_X_FORWARDED_FOR']);
return trim($addr[0]);
} else {
return $_SERVER['HTTP_X_FORWARDED_FOR'];
}
@paceaux
paceaux / profanity-regex.js
Last active April 30, 2025 15:55
profanity-regex
const profanityRegex = /(((\b|\w+)?(fuck|shit|dick|twat|cock|douche|bitch|piss)(\w+|\b))|((\w+|\b)ass?(\b|hole|face|clown))|((\b)cunt(\w+|\b)))/
@paceaux
paceaux / levenshtein.js
Last active February 26, 2024 18:15
String comparisons
/**
* @description calculates the levenshtein distance between words
* @param {string} str a string
* @param {string} str2 another string
* @returns {object} with properties 'steps' and 'transitions'
*/
function levenshtein(str, str2) {
if (typeof str !== 'string' || typeof str2 !== 'string') return;
let [shorter, longer] = [str, str2].sort((a, b) => a.length - b.length);