Skip to content

Instantly share code, notes, and snippets.

module Inflector
extend self
class Inflections
getter :plurals, :singulars, :uncountables, :humans, :acronyms, :acronym_regex
def initialize
@plurals = {
/$/ => "s",
/s$/i => "s",
library(jug)
library(xgboost)
library(Matrix)
setwd("~/Projects/xgboost_test")
model <- xgb.load("xgboost1.save")
jug() %>%
get("/model/(?<age>.*)/(?<workclass>.*)/(?<education>.*)/(?<marital_status>.*)/(?<occupation>.*)/(?<relationship>.*)/(?<gender>.*)", function(req, res, err){
df <- data.frame(
@actsasflinn
actsasflinn / xgboost2.R
Created January 3, 2017 11:21
XGBoost using caret
library(xgboost)
library(caret)
setwd("~/Projects/xgboost_test")
train_file = "train_data"
if (!file.exists(train_file)) {
download.file("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", train_file)
}
test_file = "test_data"
if (!file.exists(test_file)) {
library(xgboost)
library(Matrix)
setwd("~/Projects/xgboost_test")
train_file = "train_data"
if (!file.exists(train_file)) {
download.file("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", train_file)
}
test_file = "test_data"
if (!file.exists(test_file)) {
@actsasflinn
actsasflinn / split.rb
Created December 16, 2016 02:09
PDF Split using PDFBox
require 'java'
require './pdfbox-1.8.13.jar'
require './fontbox-1.8.13.jar'
require './commons-logging-1.2.jar'
input = java.io.File.new("example.pdf")
inputDocument = org.apache.pdfbox.pdmodel.PDDocument::loadNonSeq(input, nil)
stripper = org.apache.pdfbox.util.PDFTextStripper.new
outputDocument = org.apache.pdfbox.pdmodel.PDDocument.new
@actsasflinn
actsasflinn / Split.java
Last active December 16, 2016 02:10
PDF Split using PDFBox
import java.io.*;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.util.*;
import org.apache.pdfbox.exceptions.*;
import java.util.regex.*;
class Split {
public static void main(String[] args) throws IOException, COSVisitorException {
File input = new File("example.pdf");
hadoop fs -mkdir -p /apps/hivemall
hadoop fs -put hivemall-0.3.2-3-with-dependencies.jar /apps/hivemall
curl -L -O https://github.com/myui/hivemall/releases/download/v0.3.2-3/hivemall-0.3.2-3-with-dependencies.jar
curl -L -O https://raw.githubusercontent.com/myui/hivemall/v0.3.2-3/scripts/ddl/define-all-as-permanent.hive
CREATE DATABASE IF NOT EXISTS hivemall0323;
USE hivemall0323;
set hivevar:hivemall_jar=hdfs:///apps/hivemall/hivemall-0.3.2-3-with-dependencies.jar;
source /home/hdfs/define-all-as-permanent.hive;
-----------------------------------------------------------------------------
-- Hivemall: Hive scalable Machine Learning Library
-----------------------------------------------------------------------------
-- CREATE DATABASE IF NOT EXISTS hivemall;
-- USE hivemall;
-- set hivevar:hivemall_jar=hdfs:///apps/hivemall/hivemall-with-dependencies.jar;
DROP FUNCTION IF EXISTS hivemall_version;
CREATE FUNCTION hivemall_version as 'hivemall.HivemallVersionUDF' USING JAR '${hivemall_jar}';
create table cars_rowid as
select
regexp_replace(reflect('java.util.UUID','randomUUID'), '-', '') as rowid,
*
from flinn.cars;
create or replace view cars_train as
select
rowid,
@actsasflinn
actsasflinn / Vagrantfile
Created October 5, 2015 00:32
Hortonworks HDP Ambari Cluster Using Vagrant on Ubuntu
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.box = "hdp_vm"
config.vm.box_url = "http://opscode-vm-bento.s3.amazonaws.com/vagrant/virtualbox/opscode_centos-6.7_chef-provisionerless.box"
$script = <<SCRIPT
sudo yum -y install ntp
sudo yum -y install wget