This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# extraction pattern: ngram TAB year TAB match_count TAB volume_count NEWLINE | |
# out: unique_ngram TAB sum(match_count) NEWLINE | |
import re | |
import os, sys, mmap | |
from pathlib import Path | |
from tqdm import tqdm | |
from concurrent.futures import ThreadPoolExecutor | |
abv = re.compile(r'^(([A-Z]\.){1,})(_|[^\w])') # A.B.C. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import cv2 | |
# cv2.getGaborKernel(ksize, sigma, theta, lambda, gamma, psi, ktype) | |
# ksize - size of gabor filter (n, n) | |
# sigma - standard deviation of the gaussian function | |
# theta - orientation of the normal to the parallel stripes | |
# lambda - wavelength of the sunusoidal factor | |
# gamma - spatial aspect ratio | |
# psi - phase offset |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
import multiprocessing | |
from textwrap import dedent | |
from itertools import izip_longest | |
def process_chunk(d): | |
"""Replace this with your own function | |
that processes data one line at a |