Skip to content

Instantly share code, notes, and snippets.

@halfak
Last active August 29, 2015 14:09
Show Gist options
  • Save halfak/8967b1c29e474f355523 to your computer and use it in GitHub Desktop.
Save halfak/8967b1c29e474f355523 to your computer and use it in GitHub Desktop.
import json
import sys
"""
HEADERS = [
('index', 'index'),
('product/productId', 'product_id'),
('product/productTitle', 'product_title'),
('product/price', 'price'),
('review/userId', 'review_user_id'),
('review/profileName', 'review_profile_name'),
('review/score', 'review_score'),
('review/helpful', 'review_helpful'),
('review/ratings', 'review_ratings'),
('review/time', 'review_time'),
('review/summary', 'review_summary'),
('review/text', 'review_text')
]
"""
HEADERS = [
('index', 'index'),
('product/price', 'price'),
('product/productId', 'productId'),
('review/helpful', 'helpfulessScore'),
('review/ratings', 'helpfulnessCount'),
('review/profileName', 'profileName'),
('review/score', 'score'),
('review/summary', 'summary'),
('review/text', 'text'),
('review/time', 'time'),
('review/userId', 'userId')
]
def encode(val):
if val is None:
return "\\N"
elif isinstance(val, str):
return val.replace("\t", "\\t").replace("\n", "\\n")
else:
return str(val)
print("\t".join([h[1] for h in HEADERS]))
for line in sys.stdin:
doc = json.loads(line)
print("\t".join([encode(doc[h[0]]) for h in HEADERS]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment