Skip to content

Instantly share code, notes, and snippets.

@macbre
Last active October 20, 2017 10:09
Show Gist options
  • Save macbre/0cd6a2d2549a51b9283d3a82fcb72471 to your computer and use it in GitHub Desktop.
Save macbre/0cd6a2d2549a51b9283d3a82fcb72471 to your computer and use it in GitHub Desktop.
SUS-3072 user queries analyzed
import logging
import re
from collections import Counter
from wikia.common.kibana import Kibana
USERS = [
'Sannse',
'Ripto22475',
'Sactage',
'FANDOM',
'FANDOMbot',
'TyBot',
'Ninjamaskbot',
'QATestsStaff',
'MStrikeGundam',
'A400',
'Decembirth',
'AkulakhanBot',
]
logging.basicConfig(level=logging.INFO)
def map_entry(entry):
# SELECT /* LinkBatch::doQuery (for Skin::preloadExistence) 107.175.71.179 - 4f27c44e-cac5-4d94-a4f1-aa7838fa2693 */ page_id,page_namespace,page_title,page_len,page_is_redirect,page_latest FROM `page` WHERE (page_namespace = '2' AND page_title = '107.175.71.179') OR (page_namespace = '3' AND page_title IN ('107.175.71.179','Sannse') )
# INSERT /* Revision::insertOn ImportStarter - d075c977-5ee6-4ddb-9ae8-8ed6bf0ba8b9 */ INTO `revision` (rev_id,rev_page,rev_text_id,rev_comment,rev_minor_edit,rev_user,rev_user_text,rev_timestamp,rev_deleted,rev_len,rev_parent_id,rev_sha1) VALUES (NULL,'2','2','Updating user page','0','8','Sannse','20100529005150','0','19','0','h9syi7i95u21mam7mil611hpcgzdk59')
query = entry['@message'].strip()
kind = query.split(' ')[0]
# print(query, kind)
if kind == 'SELECT':
query = query.split(' FROM ').pop()
elif kind in ['DELETE', 'UPDATE']:
query = query.split(' WHERE ').pop()
elif kind == 'INSERT':
query = query.split(' VALUES ').pop()
elif kind in ['BEGIN', 'COMMIT', 'SHOW']:
return None
# exclude queries that do have any user name in WHERE / VALUES part
if re.search('|'.join(map(lambda u: "'{}'".format(u), USERS)), query) is None:
return None
return {
"kind": kind,
"sql": query.encode('utf8'),
"method": entry['@context']['method'],
"db_name": entry['@context']['db_name']
}
def get_stats():
source = Kibana(period=86400, index_prefix='logstash-mediawiki-sql')
# collect SQL log entries
es_query = ' OR '.join(map(lambda x: '@message: "{}"'.format(x), USERS))
rows = source.query_by_string(es_query, limit=100000)
queries = filter(
lambda i: i is not None,
map(map_entry, rows)
)
# collect stats
stats = Counter()
context = {}
for query in queries:
key = query['method']
stats[key] += 1
if key not in context:
context[key] = query
return stats, context
if __name__ == "__main__":
stats, context = get_stats()
# print results
for key, cnt in stats.most_common():
meta = context[key]
meta['cnt'] = cnt
# print(meta)
print('{method} | {kind}: {sql} ({cnt:d} queries on {db_name})'.format(**meta))
wikia-common-kibana==2.2.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment