Last active
January 16, 2021 23:30
-
-
Save onefoursix/93f80f042018029310eb to your computer and use it in GitHub Desktop.
Python CM-API Example to pull Impala Query metrics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
## ******************************************************************************************* | |
## impalaQueries.py | |
## | |
## Getting Info on Impala Queries | |
## | |
## Usage: ./impalaQueries.py | |
## | |
## ******************************************************************************************* | |
## ** imports ******************************* | |
import sys | |
import pprint | |
from datetime import datetime, timedelta | |
from cm_api.api_client import ApiResource | |
fmt = '%Y-%m-%d %H:%M:%S %Z' | |
pp = pprint.PrettyPrinter(indent=4) | |
## ** Settings ****************************** | |
## Cloudera Manager Host | |
cm_host = "toronto" | |
cm_port = "7180" | |
## Cloudera Manager login | |
cm_login = "admin" | |
## Cloudera Manager password | |
cm_password = "admin" | |
## Cluster Name | |
cluster_name = "Cluster 1" | |
## ***************************************** | |
impala_service = None | |
## Connect to CM | |
print "\nConnecting to Cloudera Manager at " + cm_host + ":" + cm_port | |
api = ApiResource(server_host=cm_host, server_port=cm_port, username=cm_login, password=cm_password) | |
## Get the Cluster | |
cluster = api.get_cluster(cluster_name) | |
## Get the IMPALA service | |
service_list = cluster.get_all_services() | |
for service in service_list: | |
if service.type == "IMPALA": | |
impala_service = service | |
print "Located Impala Service: " + service.name | |
break | |
if impala_service is None: | |
print "Error: Could not locate Impala Service" | |
quit(1) | |
## I'll configure this example to use a window of one day | |
now = datetime.utcnow() | |
start = now - timedelta(days=1) | |
print "Looking for Impala queries executed by the user \"mark\"" | |
filterStr = 'user = mark' | |
impala_query_response = impala_service.get_impala_queries(start_time=start, end_time=now, filter_str=filterStr, limit=1000) | |
queries = impala_query_response.queries | |
for i in range (0, len(queries)): | |
query = queries[i] | |
if (query.statement != "GET_SCHEMAS") and not (query.statement[:3] =="USE") and not (query.statement[:4] =="SHOW"): | |
print '-------------------------------------' | |
print "queryState : " + query.queryState | |
print "queryId: " + query.queryId | |
print "user: " + query.user | |
print "startTime: " + query.startTime.strftime(fmt) | |
print "SQL: " + query.statement | |
pp.pprint(query.attributes) | |
print '-------------------------------------' | |
print "done" |
Your CM API code is really useful.
I am looking for a similar code to monitor hive queries and CM API services.py doesn't have anything related to the hive. do you done anything on the hive ?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
how can i get database and table in same script? using queryId.. is there any query attributes to show database and table name?