Last active
June 13, 2019 02:19
-
-
Save onefoursix/3c9390fce699cfaf1b0d to your computer and use it in GitHub Desktop.
Example of how to get info on Hive YARN jobs for a specific Sentry user using the Cloudera Manager API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
## ******************************************************************************** | |
## get-hive-yarn-jobs-for-sentry-user.py | |
## | |
## Example of how to retrieve info on YARN Hive jobs for a given Sentry user | |
## using the Cloudera Manager API | |
## | |
## Usage: ./get-hive-yarn-jobs-for-sentry-user.py <sentry_user_name> | |
## | |
## <sentry_user_name> is the name of the user you want to retrieve info for | |
## | |
## for example: ./get-hive-yarn-jobs-for-sentry-user.py mark | |
## | |
## Edit the settings in the script to connect to your Cluster | |
## | |
## The script assumes one YARN Service exists on the Cluster | |
## | |
## ******************************************************************************** | |
import sys | |
import time | |
import pprint | |
from datetime import datetime, timedelta | |
from cm_api.api_client import ApiResource | |
## Settings to connect to the cluster | |
cm_host = "CM_HOST" | |
cm_port = "7180" | |
cm_login = "CM_USER" | |
cm_password = "CM_PASSWORD" | |
cluster_name = "CLUSTER_NAME" | |
## Get command line args | |
sentry_user_name = None | |
if len(sys.argv) == 2: | |
sentry_user_name = sys.argv[1] | |
else: | |
print " Usage: ./get-hive-yarn-jobs-for-sentry-user.py <sentry_user_name>" | |
quit(1) | |
## Used for formatting dates | |
fmt = '%Y-%m-%d %H:%M:%S %Z' | |
# pretty printer for printing JSON attribute lists | |
pp = pprint.PrettyPrinter(indent=4) | |
## Connect to CM | |
print "\nConnecting to Cloudera Manager at " + cm_host + ":" + cm_port | |
api = ApiResource(server_host=cm_host, server_port=cm_port, username=cm_login, password=cm_password) | |
## Get Cluster | |
cluster = None | |
clusters = api.get_all_clusters() | |
for c in clusters: | |
if c.displayName == cluster_name: | |
cluster = c | |
break | |
if cluster is None: | |
print "\nError: Cluster '" + cluster_name + "' not found" | |
quit(1) | |
## Get YARN Service | |
yarn_service = None | |
service_list = cluster.get_all_services() | |
for service in service_list: | |
if service.type == "YARN": | |
yarn_service = service | |
break | |
if yarn_service is None: | |
print "Error: Could not locate YARN Service" | |
quit(1) | |
## I'll configure this example to use a window of one day | |
now = datetime.utcnow() | |
start = now - timedelta(days=1) | |
## Use the hive user to select only the Hive jobs; | |
## We'll look for the sentry_user_name after we get the results | |
filterStr = 'user = hive' | |
## Get the YARN Hive jobs | |
yarn_apps_response = yarn_service.get_yarn_applications(start_time=start, end_time=now, filter_str=filterStr, limit=1000) | |
yarn_apps = yarn_apps_response.applications | |
## Iterate over the jobs | |
for i in range (0, len(yarn_apps)): | |
yarn_app = yarn_apps[i] | |
## Change 'False' to 'True' in the line below to see all of the YARN attributes available for each job | |
## if you see ones you want to print, refer to them like the 'hive_sentry_subject_name' example below | |
if False: | |
pp = pprint.PrettyPrinter(indent=4) | |
pp.pprint(yarn_app.attributes) | |
## Get the Sentry user for the job | |
sentry_subject = yarn_app.attributes['hive_sentry_subject_name'] | |
## print the jobs only for the Sentry user we are looking for | |
if sentry_subject is None or sentry_subject != sentry_user_name: | |
break | |
print "\n-- YARN Job ID: " + yarn_app.applicationId + " --------------" | |
print "YARN App Name: " + yarn_app.name | |
print "YARN User: " + yarn_app.user | |
print "Sentry User: " + sentry_subject | |
## I'll print a couple more YARN app attributes | |
hive_query_string = yarn_app.attributes['hive_query_string'] | |
if hive_query_string is not None: | |
print "Hive Query: " + hive_query_string | |
cpu_millis = yarn_app.attributes['cpu_milliseconds'] | |
if cpu_millis is not None: | |
print "CPU Millis: " + cpu_millis | |
print "\n-----------------------------------------" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment