Skip to content

Instantly share code, notes, and snippets.

View kyleburton's full-sized avatar

Kyle Burton kyleburton

View GitHub Profile
#!/bin/bash
SVC_HOME="/var/lib/the-clj-service/current"
# no args means we fork
if [ -z "${1:-}" ]; then
export SVC_JAR="$SVC_HOME/the-clj-servicejar"
if [ -z "$SVC_JAR" ]; then
echo "Error: SVC_JAR not found"
exit 1
#!/usr/bin/env ruby
require File.join(File.dirname(__FILE__),'..','src','main','ruby','util')
require File.join(File.dirname(__FILE__),'deploy-check')
class ClojureServiceDeployer < ServiceUtils
def run
unless File.exist? release_jar_file
unless system("mvn assembly:assembly")
(ns clj-etl-utils.sequences)
(def random-sample-seq
(let [rnd (java.util.Random.)]
(fn self [[item & population :as population-seq] population-size remaining-samples-needed]
(if (or (zero? remaining-samples-needed) (empty? population-seq))
nil
(if (< (.nextInt rnd population-size) remaining-samples-needed)
(lazy-cat
SELECT column FROM table
ORDER BY RANDOM()
LIMIT 20000
public class Main {
public static void main ( String args [] ) {
int size = 392000000;
int sampleSize = 20000;
System.err.println("Start: allocation");
int [] array = new int[size];
System.err.println("Done: allocation");
java.util.Random rnd = new java.util.Random();
System.err.println("Start: population w/line numbers");
(defn find-dupes-naieve [inp-seq]
(reduce (fn [res item]
(assoc res item (inc (get res item 0))))
{}
(map #(second (.split %1 "\t")) inp-seq)))
(defn find-dupes-with-bloom-filter [inp-seq expected-size fp-prob]
(let [flt (bloom/make-optimal-filter expected-size fp-prob)]
(reduce (fn [res item]
(sequences/random-sample-seq (ds/read-lines "phone-nums.txt")
2000000
200)
(defn count-area-codes [inp-seq]
(reduce (fn [m line]
(let [phnum (second (.split line "\t"))
[_ area-code] (first (re-seq #"\((\d+)\)" phnum))]
(assoc m area-code (inc (get m area-code 0)))))
{}
inp-seq))
(apply
merge-with +
(reduce
(fn [res counts]
(merge-with + res counts))
(pmap (fn [[start end]]
(count-area-codes
(io/read-lines-from-file-segment inp-file start end)))
(partition 2 1
(io/byte-partitions-at-line-boundaries
inp-file
(* 1024 1024)))))
(defun krb-rename-file-visited-by-this-buffer (new-name)
(interactive (list (read-string (format "Rename [%s] to: " buffer-file-name))))
(let ((file-name buffer-file-name))
(if (not (string-match "/" new-name))
(setq new-name (format "%s%s" (file-name-directory file-name)
new-name)))
(message "rename-file: %s to %s" file-name new-name)
(rename-file file-name new-name)
(kill-buffer (buffer-name))
(find-file new-name)))