This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Data source: https://storage.googleapis.com/books/ngrams/books/datasetsv2.html | |
# extraction pattern: ngram TAB year TAB match_count TAB volume_count NEWLINE | |
# out: unique_ngram TAB sum(match_count) NEWLINE | |
import os, sys | |
from pathlib import Path | |
from concurrent.futures import ProcessPoolExecutor | |
from multiprocessing import freeze_support | |
import polars as pl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// decode credentials upon receiving them from store | |
function decodeCredentials(crd){ | |
// decrypt it first | |
const dec = CryptoJS.AES.decrypt(crd, saltCredentials).toString(CryptoJS.enc.Utf8); | |
// extract the creds length and pepper step | |
const len = dec.charCodeAt(0) - 96; | |
const step = dec.charCodeAt(1) - 96; | |
let i = 0, j = 2, d = []; | |
// extract the pepper from the salt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// used to obfuscate and encrypt the credentials | |
const saltCredentials = "jf02heg9u64a{%m<83#@;Pxrjg17uyr#@&*%^Y"; | |
// encode credentials before storing | |
function encodeCredentials(crds){ | |
// json object expected e.g. {'api-id':'K0xf56g', 'pwd':'Some.Pa$$w0rd'} | |
const crd = JSON.stringify(crds); | |
const len = crd.length; | |
// this constraint is due to storing the length in one byte | |
if (len > 159) return null; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<script src="https://cdnjs.cloudflare.com/ajax/libs/crypto-js/4.0.0/crypto-js.min.js"></script> | |
<script> | |
// used to obfuscate and encrypt the credentials | |
const saltCredentials = "jf02heg9u64a{%m<83#@;Pxrjg17uyr#@&*%^Y"; | |
// encode credentials before storing | |
function encodeCredentials(crds){ | |
// json object expected e.g. {'api-id':'K0xf56g', 'pwd':'Some.Pa$$w0rd'} | |
const crd = JSON.stringify(crds); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# extraction pattern: ngram TAB year TAB match_count TAB volume_count NEWLINE | |
# out: unique_ngram TAB sum(match_count) NEWLINE | |
import re | |
import os, sys, mmap | |
from pathlib import Path | |
from tqdm import tqdm | |
from concurrent.futures import ThreadPoolExecutor | |
abv = re.compile(r'^(([A-Z]\.){1,})(_|[^\w])') # A.B.C. |