Skip to content

Instantly share code, notes, and snippets.

@reinout
Created April 28, 2016 12:41
Show Gist options
  • Save reinout/90773bc811bade5f5e020a92516b3f59 to your computer and use it in GitHub Desktop.
Save reinout/90773bc811bade5f5e020a92516b3f59 to your computer and use it in GitHub Desktop.
"""Script to report on geoserver layer usage.
I've run it on p-web-map-d9 like this::
cd /var/log/nginx
mkdir /tmp/logs
cp access.log* /tmp/logs/
cd /tmp/logs
gunzip *.gz
cat access.log* |grep -v ^127.0.0.1 > alles.txt
rm access.log*
grep -v HEAD alles.txt > alles_zonder_HEAD.txt
cat alles_zonder_HEAD.txt |grep -i wms|grep geoserver > geoserver_wms.txt
cat geoserver_wms.txt |grep -i layers > geoserver_wms_layers.txt
python extract.py
"""
from collections import defaultdict
from pprint import pprint
import re
import sys
REGEX = re.compile("""
.*
/geoserver/
(?P<category>[^/]+)
/wms
.*
LAYERS=
(?P<layers>[^\&]+)
.*
""", re.VERBOSE)
def main():
categories = {}
for index, line in enumerate(open('geoserver_wms_layers.txt')):
match = REGEX.search(line)
if match:
category_name = match.group('category')
if category_name not in categories:
categories[category_name] = {}
category = categories[category_name]
layer_names = match.group('layers').split(',')
for layer_name in layer_names:
if layer_name not in category:
category[layer_name] = 0
category[layer_name] += 1
# if index > 20000:
# break
counts_plus_categories = sorted(
[(sum(category.values()), category_name) for (category_name, category) in categories.items()])
counts_plus_categories.reverse()
sorted_category_names = [name for count, name in counts_plus_categories]
for category_name in sorted_category_names:
layers = categories[category_name]
title = "Category '%s' (%s hits)" % (category_name, sum(layers.values()))
print(title)
print("=" * len(title))
print('')
counts_plus_layers = sorted([(count, name) for (name, count) in layers.items()])
counts_plus_layers.reverse()
for (count, name) in counts_plus_layers:
print(" %s\t%s" % (count, name))
print('')
print('')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment