Skip to content

Instantly share code, notes, and snippets.

@mfernest
Last active January 16, 2021 23:27
Show Gist options
  • Save mfernest/3721fce598691de427a20ccb127f0230 to your computer and use it in GitHub Desktop.
Save mfernest/3721fce598691de427a20ccb127f0230 to your computer and use it in GitHub Desktop.
Use CM API to report Hive usage
#!/usr/bin/python
## ********************************************************************************
## get-yarn-usage.py
##
## Usage: ./get-hive-usage-user.py
##
## Edit the settings below to connect to your Cluster
##
## ********************************************************************************
import sys
from datetime import datetime, timedelta
import pprint
from cm_api.api_client import ApiResource
## Connect to the cluster
cm_host = "mdonsky-1.gce.cloudera.com"
cm_port = "7180"
cm_login = "admin"
cm_password = "admin"
cluster_name = "Cluster 1"
## Parse command line
user_name = None
if len(sys.argv) == 2:
user_name = sys.argv[1]
else:
print " Usage: ./get-hive-usage-user.py <user_name>"
quit(1)
## Date format pattern
fmt = '%Y-%m-%d %H:%M:%S %Z'
## Prettify JSON output
pp = pprint.PrettyPrinter(indent=4)
## Connect to CM
print "\nConnecting to Cloudera Manager at " + cm_host + ":" + cm_port
api = ApiResource(server_host=cm_host, server_port=cm_port, username=cm_login, password=cm_password)
## Report clusters by name
cluster = None
clusters = api.get_all_clusters()
for c in clusters:
if c.displayName == cluster_name:
cluster = c
break
if cluster is None:
print "\nError: Cluster '" + cluster_name + "' not found"
quit(1)
## Locate YARN service
yarn_service = None
service_list = cluster.get_all_services()
for service in service_list:
if service.type == "YARN":
yarn_service = service
break
if yarn_service is None:
print "Error: Could not locate YARN Service"
quit(1)
now = datetime.utcnow()
start = now - timedelta(days=60)
filterStr = 'user = '+user_name
yarn_apps_response = yarn_service.get_yarn_applications(start_time=start, end_time=now, filter_str=filterStr, limit=1000)
yarn_apps = yarn_apps_response.applications
## Iterate over jobs
for i in range (0, len(yarn_apps)):
yarn_app = yarn_apps[i]
## Get the Hive SQL
hive_query_string = yarn_app.attributes.get("hive_query_string",None)
if hive_query_string is not None:
print "\n-- YARN Job ID: " + yarn_app.applicationId + " --------------"
print "YARN App Name: " + yarn_app.name
print "YARN User: " + yarn_app.user
print "State: " + yarn_app.state
print "Hive Query: " + hive_query_string
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment