Skip to content

Instantly share code, notes, and snippets.

@gruzovator
Created April 21, 2015 06:50
Show Gist options
  • Save gruzovator/00c7c33bc6593cce0080 to your computer and use it in GitHub Desktop.
Save gruzovator/00c7c33bc6593cce0080 to your computer and use it in GitHub Desktop.
PyHDFS test
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Script to test WebHDFS (pyhdfs python lib)
2015-04-21
"""
import argparse
import sys
import os
import re
import pyhdfs
import time
import logging
logging.raiseExceptions = False
def test():
parser = argparse.ArgumentParser(description='Script to test WebHDFS.')
parser.add_argument('-l', '--local', required=True,
help='local file path')
parser.add_argument('-r', '--remote', required=True,
help='HDFS path: hdfs://<host><path>, e.g. hdfs://hd0-dev/user/kmsearch_service')
parser.add_argument('-o', '--operation', choices=['upload', 'download'], required=True,
help='operation to test')
args = parser.parse_args()
local_file = args.local
match = re.match('^hdfs://(?P<host>.*?)(?P<path>/.*)$', args.remote)
if not match:
raise Exception('remote path doesn\'t have hdfs://<host><path> format')
remote_host = match.group('host')
remote_path = match.group('path')
t0 = time.time()
client = pyhdfs.HdfsClient(remote_host)
if args.operation == 'upload':
client.copy_from_local(local_file, remote_path, overwrite=True)
elif args.operation == 'download':
client.copy_to_local(remote_path, local_file)
print 'Done. Elapsed time: {:.2f}s'.format(time.time()-t0)
if __name__ == '__main__':
try:
test()
except Exception as ex:
print 'Test execution error:', str(ex)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment