Skip to content

Instantly share code, notes, and snippets.

@elliottcordo
elliottcordo / mahout_to_redis.pig
Last active August 29, 2015 13:57
output transform
--item sim
REGISTER '/home/hduser/libs/pig-redis.jar';
raw = LOAD '/user/movie_lens_rec_item_similarity'
USING PigStorage('\t') as (item1:chararray, item2:chararray, rating:chararray);
exp_tuple = FOREACH raw GENERATE item1, TOTUPLE(item2, rating);
STORE exp_tuple INTO 'dummy' USING com.hackdiary.pig.RedisStorer('zset','192.168.56.1');
@elliottcordo
elliottcordo / gist:9bfd44c67863d5cfb250
Last active August 29, 2015 14:03
neo4j rest client example
##http://neo4j-rest-client.readthedocs.org/en/latest/info.html
import string
from neo4jrestclient.client import GraphDatabase
gdb = GraphDatabase("http://localhost:7474/db/data/")
items = open("/Users/elliottcordo/Downloads/ml-100k/u.item")
for row in items:
row = filter(lambda x: x in string.printable, row) #there are special characters that will screw things up
@elliottcordo
elliottcordo / redis-timeseries-zet
Created August 8, 2014 11:59
storing and working with timeseries in a redis zset
127.0.0.1:6379> zadd mts-123456 20140302 5.6
(integer) 1
127.0.0.1:6379> zadd mts-123456 20140301 7
(integer) 1
127.0.0.1:6379> zadd mts-123456 20140304 3
(integer) 1
127.0.0.1:6379> zadd mts-123456 20140303 2.7
(integer) 1
127.0.0.1:6379> ZRANGEBYSCORE mts-123456 20140301 20140302
1) "7"
@elliottcordo
elliottcordo / gist:59d3c90b158331fe6ed7
Created August 13, 2014 20:21
python-redshift-pandas-statistics
import sys
import logging
import psycopg2
import pandas as pd
import pandas.io.sql as sqlio
import ConfigParser
import argparse
import statistics
from pandas import pivot_table, crosstab
from datetime import datetime
@elliottcordo
elliottcordo / intro_to_redis
Created August 26, 2014 15:32
Intro to Redis
127.0.0.1:6379> sadd rl1 matt
127.0.0.1:6379> sadd rl1 ben
127.0.0.1:6379> sadd rl1 judy
127.0.0.1:6379> smembers rl1
1) "judy"
2) "ben"
3) "matt"
@elliottcordo
elliottcordo / simple_redis_zinterstore
Created August 26, 2014 22:22
another simple redis example
zadd lb-102-dist 546.2 eswar 400 elliott 311.2 marie 555 neel
zadd lb-103-dist 511.333 eswar 200 elliott 132 sue 888.4 jill
zadd demo-women 1 jill 1 sue 1 marie
zadd demo-men 1 elliott 1 eswar 1 neel
zunionstore tmp-1 2 lb-103-dist lb-102-dist
zinterstore tmp-1 2 demo-women tmp-1 WEIGHTS 0 1
zrevrange tmp-1 0 2
1. max surrogate key for dim
--drop table tmp_max_key_d_type2
create temporary table tmp_max_key_d_type2 as
select case when max(type2_key)is null then 1 else max(type2_key) end as max_key,
trunc(getdate())as created_date
from d_type2
2. last record in dim for selected natural key
create temp table temp_d_type2_old
as
@elliottcordo
elliottcordo / hive_update_strategy.hql
Created September 17, 2014 14:00
basic hive update strategy with dynamic partitioning
/* sales.csv
pizza,10.50,1,20140901
golf balls,4.44,1,20140901
hair gel,5,1,20140902
cream puffs,1.24,1,20140908
*/
/* sales2.csv
apples,4,1,20140908
frogs,3,1,20140908
@elliottcordo
elliottcordo / sql_alchemy_schema_migration.py
Last active August 29, 2015 14:07
sql alchemy schema migration
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy import create_engine, MetaData, Table, schema, Text, Index, select, func
to_db="mysql+pymysql://root@localhost/test"
#to_db="postgres://admin:your_redshift_cluster/pwd"
from_db="mysql+pymysql://root@localhost/test"
def make_session(connection_string):
# download MovieLens data
wget --output-document=data/ml-100k.zip http://www.grouplens.org/system/files/ml-100k.zip
wget --output-document=data/ml-1m.zip http://www.grouplens.org/system/files/ml-1m.zip
wget --output-document=data/ml-10m.zip http://files.grouplens.org/papers/ml-10m.zip
cd data
# unzip data
unzip ml-100k.zip
unzip ml-1m.zip