Skip to content

Instantly share code, notes, and snippets.

@actsasflinn
actsasflinn / kyoto_cabinet_099_snow_leopard_compatibility.patch
Created April 2, 2010 03:16
Kyoto Cabinet patch fix compatibility on Mac OSX Snow Leopard (Kyoto Cabinet 0.9.9)
--- kcthread.cc.orig 2010-04-01 22:59:17.000000000 -0400
+++ kcthread.cc 2010-04-01 23:04:41.000000000 -0400
@@ -178,12 +178,12 @@ Mutex::Mutex(Type type) {
break;
}
case ERRORCHECK: {
- if (::pthread_mutexattr_settype(&attr, ::PTHREAD_MUTEX_ERRORCHECK) != 0)
+ if (::pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK) != 0)
throw std::runtime_error("pthread_mutexattr_settype");
break;
@actsasflinn
actsasflinn / WSAppender.java
Created June 2, 2011 05:13
Web Service Log4j Appender, loosely based on SMTPAppender
package client;
import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.Layout;
import org.apache.log4j.Level;
import org.apache.log4j.helpers.CyclicBuffer;
import org.apache.log4j.helpers.LogLog;
import org.apache.log4j.helpers.OptionConverter;
import org.apache.log4j.spi.ErrorCode;
import org.apache.log4j.spi.LoggingEvent;
function doHash(str, seed) {
var m = 0x5bd1e995;
var r = 24;
var h = seed ^ str.length;
var length = str.length;
var currentIndex = 0;
while (length >= 4) {
var k = UInt32(str, currentIndex);
@actsasflinn
actsasflinn / Vagrantfile
Created October 5, 2015 00:32
Hortonworks HDP Ambari Cluster Using Vagrant on Ubuntu
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.box = "hdp_vm"
config.vm.box_url = "http://opscode-vm-bento.s3.amazonaws.com/vagrant/virtualbox/opscode_centos-6.7_chef-provisionerless.box"
$script = <<SCRIPT
sudo yum -y install ntp
sudo yum -y install wget
create table cars_rowid as
select
regexp_replace(reflect('java.util.UUID','randomUUID'), '-', '') as rowid,
*
from flinn.cars;
create or replace view cars_train as
select
rowid,
-----------------------------------------------------------------------------
-- Hivemall: Hive scalable Machine Learning Library
-----------------------------------------------------------------------------
-- CREATE DATABASE IF NOT EXISTS hivemall;
-- USE hivemall;
-- set hivevar:hivemall_jar=hdfs:///apps/hivemall/hivemall-with-dependencies.jar;
DROP FUNCTION IF EXISTS hivemall_version;
CREATE FUNCTION hivemall_version as 'hivemall.HivemallVersionUDF' USING JAR '${hivemall_jar}';
hadoop fs -mkdir -p /apps/hivemall
hadoop fs -put hivemall-0.3.2-3-with-dependencies.jar /apps/hivemall
curl -L -O https://github.com/myui/hivemall/releases/download/v0.3.2-3/hivemall-0.3.2-3-with-dependencies.jar
curl -L -O https://raw.githubusercontent.com/myui/hivemall/v0.3.2-3/scripts/ddl/define-all-as-permanent.hive
CREATE DATABASE IF NOT EXISTS hivemall0323;
USE hivemall0323;
set hivevar:hivemall_jar=hdfs:///apps/hivemall/hivemall-0.3.2-3-with-dependencies.jar;
source /home/hdfs/define-all-as-permanent.hive;
@actsasflinn
actsasflinn / Split.java
Last active December 16, 2016 02:10
PDF Split using PDFBox
import java.io.*;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.util.*;
import org.apache.pdfbox.exceptions.*;
import java.util.regex.*;
class Split {
public static void main(String[] args) throws IOException, COSVisitorException {
File input = new File("example.pdf");
@actsasflinn
actsasflinn / split.rb
Created December 16, 2016 02:09
PDF Split using PDFBox
require 'java'
require './pdfbox-1.8.13.jar'
require './fontbox-1.8.13.jar'
require './commons-logging-1.2.jar'
input = java.io.File.new("example.pdf")
inputDocument = org.apache.pdfbox.pdmodel.PDDocument::loadNonSeq(input, nil)
stripper = org.apache.pdfbox.util.PDFTextStripper.new
outputDocument = org.apache.pdfbox.pdmodel.PDDocument.new
library(xgboost)
library(Matrix)
setwd("~/Projects/xgboost_test")
train_file = "train_data"
if (!file.exists(train_file)) {
download.file("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", train_file)
}
test_file = "test_data"
if (!file.exists(test_file)) {