Skip to content

Instantly share code, notes, and snippets.

@klinkin
Last active August 29, 2015 14:17
Show Gist options
  • Save klinkin/bad2d9311ce060499146 to your computer and use it in GitHub Desktop.
Save klinkin/bad2d9311ce060499146 to your computer and use it in GitHub Desktop.
Lab2 mapper.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
from time import time
import happybase
MY_N = 138
connection = happybase.Connection('172.31.25.248')
table = connection.table('mikhail.klimin')
# input comes from STDIN (standard input)
for line in sys.stdin:
# remove leading and trailing whitespace
if not line.strip():
# skip blank lines
continue
# split the line into tuple
uid, timestamp, url = line.split("\t")
# convert uid (currently a string) to int
# skip blank url
try:
uid = int(uid)
timestamp = float(timestamp)
if not url.strip():
raise ValueError
except ValueError:
# uid was not a integer, so silently
# ignore/discard this line
continue
# write the results to STDOUT (standard output);
# tab-delimited
if uid % 256 == MY_N:
# print '{0}\t{1}\t{2}'.format(str(uid), int(timestamp*1000), url)
table.put(str(uid), {'data:url': url}, timestamp=int(timestamp*1000))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment