Skip to content

Instantly share code, notes, and snippets.

@jsanz
Last active January 19, 2018 18:52
Show Gist options
  • Select an option

  • Save jsanz/09556fee57de101f25b58c2acc8c809a to your computer and use it in GitHub Desktop.

Select an option

Save jsanz/09556fee57de101f25b58c2acc8c809a to your computer and use it in GitHub Desktop.
Python: Get all queries from your CARTO maps

Report maps and datasets of a CARTO account

See the report_xurxosanz.html report for an example of the output.

Usage

$ python carto_report.py -h
usage: carto_report.py [-h] [--user USER] [--organization ORGANIZATION]
                       [--base_url CARTO_BASE_URL] [--api_key CARTO_API_KEY]

Generates a HTML report of maps and datasets for a CARTO account

optional arguments:
  -h, --help            show this help message and exit
  --user USER           Set the name of the CARTO account (defaults to env
                        variable CARTO_USER)
  --organization ORGANIZATION
                        Set the name of the organization account (defaults to
                        env variable CARTO_ORG)
  --base_url CARTO_BASE_URL
                        Set the base URL. For example:
                        https://username.carto.com/ (defaults to env variable
                        CARTO_API_URL)
  --api_key CARTO_API_KEY
                        Api key of the account (defaults to env variable
                        CARTO_API_KEY)

Requirements

$ pip install carto jinja2
# coding: utf-8
import os
import csv
import re
import warnings
import logging
import argparse
warnings.filterwarnings('ignore')
from carto.auth import APIKeyAuthClient
from carto.visualizations import VisualizationManager
from carto.datasets import DatasetManager
from carto.maps import NamedMapManager, NamedMap
from jinja2 import Environment, FileSystemLoader
# Logger (better than print)
logging.basicConfig(
level=logging.INFO,
format=' %(asctime)s - %(levelname)s - %(message)s',
datefmt='%I:%M:%S %p')
logger = logging.getLogger()
# set input arguments
parser = argparse.ArgumentParser(
description='Generates a HTML report of maps and datasets for a CARTO account')
parser.add_argument('--user', type=str, dest='user',
default=os.getenv('CARTO_USER',False),
help='Set the name of the CARTO account' +
' (defaults to env variable CARTO_USER)')
parser.add_argument('--organization', type=str, dest='organization',
default=os.getenv('CARTO_ORG',False),
help='Set the name of the organization' +
' account (defaults to env variable CARTO_ORG)')
parser.add_argument('--base_url', type=str, dest='CARTO_BASE_URL',
default=os.getenv('CARTO_API_URL',False),
help='Set the base URL. For example:' +
' https://username.carto.com/ ' +
'(defaults to env variable CARTO_API_URL)')
parser.add_argument('--api_key', dest='CARTO_API_KEY',
default=os.getenv('CARTO_API_KEY',False),
help='Api key of the account' +
' (defaults to env variable CARTO_API_KEY)')
args = parser.parse_args()
# Authenticate to CARTO account
if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization:
carto_key = APIKeyAuthClient(
args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization)
else:
logger.error('You need to provide valid credentials, run with -h parameter for details')
import sys
sys.exit(1)
# Helper functions
def getKey(obj):
return obj.updated_at
# Helper function to dive deep into the analysis objects
def getSourceQuery(analysis):
if analysis['type'] != 'source':
params = analysis['params']
if 'source' in params:
return getSourceQuery(params['source'])
elif 'left_source' in params and 'right_source' in params:
return [ getSourceQuery(params['left_source']) , getSourceQuery(params['right_source'])]
else:
return analysis['params']['query']
# Helper function to format queries
def removeBlanksCarriageReturns(string):
return re.sub('[ \t]{2,100}',' ', string.replace('\n',' '))
def flatten(lis):
"""Given a list, possibly nested to any level, return it flattened."""
new_lis = []
for item in lis:
if type(item) == type([]):
new_lis.extend(flatten(item))
else:
new_lis.append(item)
return new_lis
# ### Retrieve data from your account
vm = VisualizationManager(carto_key)
#retrieve all data from account's maps
logger.info('Getting all maps data...')
vizs = vm.all()
logger.info('Retrieved {} maps'.format(len(vizs)))
maps = [{
'id': viz.id,
'name': viz.name,
'created': viz.created_at,
#'tags': viz.tags,
'description': viz.description,
'url': viz.url
} for viz in sorted(vizs, key=getKey, reverse=True)]
dm = DatasetManager(carto_key)
#retrieve all data from account's maps
logger.info('Getting all datasets data...')
dsets = dm.all()
logger.info('Retrieved {} datasets'.format(len(dsets)))
tables = [{
'id': table.id,
'name': table.name,
'privacy' : table.privacy,
'created': table.created_at,
'tags': table.tags,
'description': table.description,
#'synchronization': table.synchronization
} for table in dsets]
nmm = NamedMapManager(carto_key)
logger.info('Getting all named maps inforamtion....')
for i, m in enumerate(maps):
nm = nmm.get('tpl_' + m['id'].replace('-','_'))
if nm:
m['named'] = nm
else:
logger.warn('Named map not found for map {}'.format(m['name']))
logger.info('Received {} named maps'.format(i+1))
for i, m in enumerate(maps):
try:
layergroup = m['named'].layergroup
# BUILDER templates
if 'analyses' in layergroup and len(layergroup['analyses']) > 0:
sources = [ getSourceQuery(analysis) for analysis in m['named'].layergroup['analyses']]
if len(sources) > 0:
m['sources'] = sources
else:
logger.warn('No sources found on analyses')
# OLD templates
elif len(layergroup['layers']) > 0:
m['sources'] = []
for layer in layergroup['layers']:
if 'options' in layer and 'sql' in layer['options']:
m['sources'].append(layer['options']['sql'])
# everything else
else:
logger.warn('No sources found for {} ({})'.format(m['name'],i))
except Exception as e:
logger.warn('error processing sources of map {} ({})'.format(i,e))
# ### Reporting
env = Environment( loader = FileSystemLoader('.') )
template = env.get_template('template.html')
# Flatten queries and remove duplicates
for i,m in enumerate(maps):
flat = flatten(m['sources'])
queries = []
for query in flat:
if query:
try:
queries.append(removeBlanksCarriageReturns(query))
except Exception as e:
logger.error(query)
m['sources_flat'] = list(set(queries))
# Print report
report_file = 'report_{}_.html'.format(args.user)
with open(report_file,'w') as r:
r.write(template.render({
'USER':args.user,
'maps': maps,
'datasets': tables
}))
logger.info('{} generated'.format(report_file))
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Account report</title>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
<style>
.code{
white-space: pre-wrap;
font-family: Courier New, Courier, monospace;
font-size: smaller;
display: block;
margin-bottom: 10px;
}
</style>
</head>
<body>
<div class="container-fluid">
<h1>xurxosanz maps and datasets</h1>
<p><a href="#datasets">jump to datasets</a></p>
<h2>Maps</h2>
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
<th style="min-width: 300px;" scope="col">name</th>
<th scope="col">created</th>
<th style="min-width: 450px" scope="col">description</th>
<th scope="col">queries</th>
</tr>
</thead>
<tbody>
<tr>
<td scope="row"><a href="https://xurxosanz.carto.com/viz/159646ec-4280-11e4-8435-0e73339ffa50/map">Paseo FVB 21/09/14 with modulo</a></td>
<td>2014-09-22 17:44:21+00:00</td>
<td>**Recorrido**: Dar vueltas a la Ronda Interior y animar al resto de ciclistas que veáis o conozcáis que han salido esta mañana a apuntarse al **BICIPASEO**.</td>
<td>
<div>
<span class="code">SELECT * FROM (WITH route AS ( SELECT * FROM paseo WHERE route = 1 AND lap>0 ORDER BY id) SELECT 1 cartodb_id, ST_MakeLine(the_geom_webmercator) as the_geom_webmercator FROM route GROUP BY lap) AS wrapped_query WHERE <%= layer0 %>=1;</span>
<span class="code">SELECT * FROM (SELECT cartodb_id, the_geom_webmercator, ((random()*10-10) + id%3) id FROM paseo WHERE lap>0 AND route = 1 ) AS wrapped_query WHERE <%= layer2 %>=1;</span>
<span class="code">SELECT * FROM (SELECT * FROM paseo WHERE route = 1 and lap = 0) AS wrapped_query WHERE <%= layer1 %>=1;</span>
</div>
</td>
</tr>
<tr>
<td scope="row"><a href="https://xurxosanz.carto.com/viz/bd4f6085-ea4b-479a-a075-97df123c7db5/map">Bicing simple</a></td>
<td>2017-04-25 18:01:27+00:00</td>
<td></td>
<td>
<div>
<span class="code">SELECT * FROM barrio;</span>
<span class="code">SELECT * FROM bicing;</span>
</div>
</td>
</tr>
<tr>
<td scope="row"><a href="https://xurxosanz.carto.com/viz/bb13f0ee-292b-11e7-ba24-0e3ebc282e83/map">Bicing advanced</a></td>
<td>2017-04-24 20:22:25+00:00</td>
<td></td>
<td>
<div>
<span class="code">SELECT * FROM barrio;</span>
<span class="code">SELECT * FROM bicing;</span>
</div>
</td>
</tr>
<tr>
<td scope="row"><a href="https://xurxosanz.carto.com/viz/bcdcf856-1a6c-11e4-9aa6-0e230854a1cb/map">World weather</a></td>
<td>2014-08-02 17:45:05+00:00</td>
<td>Stations aggregated data</td>
<td>
<div>
<span class="code">SELECT * FROM (with obs as ( select o.station_id, (max(max_tmp)-32)*5/9 - (min(min_tmp)-32)*5/9 diff_tmp, sum(obs_count) obs_count from observations_month o group by o.station_id) select s.cartodb_id, s.station_id, s.the_geom, s.the_geom_webmercator, initcap(s.name) station, o.diff_tmp diff_tmp, to_char(diff_tmp,'FM999.0') diff_tmp_label, obs_count samples from stations s join obs o on s.station_id = o.station_id where o.obs_count>300 order by diff_tmp) AS wrapped_query WHERE <%= layer2 %>=1;</span>
<span class="code">SELECT * FROM (with obs as ( select o.station_id, sum(rain_count) rain_count, sum(obs_count) obs_count from observations_month o group by o.station_id) select s.cartodb_id, s.station_id, s.the_geom, s.the_geom_webmercator, initcap(s.name) station, o.rain_count rain_count, o.obs_count samples from stations s join obs o on s.station_id = o.station_id where o.obs_count>300 and rain_count>0 order by o.rain_count ) AS wrapped_query WHERE <%= layer3 %>=1;</span>
<span class="code">SELECT * FROM (with obs_max_tmp as ( select o.station_id, max(max_tmp) max_tmp, sum(obs_count) obs_count from observations_month o group by o.station_id) select s.cartodb_id, s.station_id, s.the_geom, s.the_geom_webmercator, initcap(s.name) station, (o.max_tmp-32)*5/9 max_tmp, to_char((o.max_tmp-32)*5/9,'FM999.0') max_tmp_label, obs_count samples from stations s join obs_max_tmp o on s.station_id = o.station_id where o.obs_count>300 order by max_tmp ) AS wrapped_query WHERE <%= layer0 %>=1;</span>
<span class="code">SELECT * FROM (with obs as ( select o.station_id, min(min_tmp) min_tmp, sum(obs_count) obs_count from observations_month o group by o.station_id) select s.cartodb_id, s.station_id, s.the_geom, s.the_geom_webmercator, initcap(s.name) station, (o.min_tmp-32)*5/9 min_tmp, to_char((o.min_tmp-32)*5/9,'FM999.0') min_tmp_label, obs_count samples from stations s join obs o on s.station_id = o.station_id where o.obs_count>300 order by min_tmp desc) AS wrapped_query WHERE <%= layer1 %>=1;</span>
</div>
</td>
</tr>
<tr>
<td scope="row"><a href="https://xurxosanz.carto.com/viz/b4615402-d14f-11e4-9262-0e4fddd5de28/map">Checkins</a></td>
<td>2015-03-23 11:28:18+00:00</td>
<td></td>
<td>
<div>
<span class="code">SELECT * FROM (SELECT *,to_timestamp(ts/1000) as date FROM checkins WHERE ts > 1426377600000) AS wrapped_query WHERE <%= layer0 %>=1;</span>
</div>
</td>
</tr>
<tr>
<td scope="row"><a href="https://xurxosanz.carto.com/viz/dae141c4-40b2-11e4-bae5-0e230854a1cb/map">Paseo FVB 21/09/14</a></td>
<td>2014-09-20 10:42:44+00:00</td>
<td>**Recorrido**: Dar vueltas a la Ronda Interior y animar al resto de ciclistas que veáis o conozcáis que han salido esta mañana a apuntarse al **BICIPASEO**.</td>
<td>
<div>
<span class="code">SELECT * FROM (WITH route AS ( SELECT * FROM paseo WHERE lap>0 AND route = 1 ), laps AS ( SELECT cartodb_id, the_geom_webmercator, id FROM route r1 UNION SELECT cartodb_id, the_geom_webmercator, CASE WHEN id > 25 THEN id - 25 ELSE id - 25 + 254 END id FROM route r2 UNION SELECT cartodb_id, the_geom_webmercator, CASE WHEN id > 50 THEN id - 50 ELSE id - 50 + 254 END id FROM route r3 UNION SELECT cartodb_id, the_geom_webmercator, CASE WHEN id > 75 THEN id - 75 ELSE id - 75 + 254 END id FROM route r4 UNION SELECT cartodb_id, the_geom_webmercator, CASE WHEN id > 100 THEN id - 100 ELSE id - 100 + 254 END id FROM route r5 UNION SELECT cartodb_id, the_geom_webmercator, CASE WHEN id > 125 THEN id - 125 ELSE id - 125 + 254 END id FROM route r6 UNION SELECT cartodb_id, the_geom_webmercator, CASE WHEN id > 150 THEN id - 150 ELSE id - 150 + 254 END id FROM route r7 UNION SELECT cartodb_id, the_geom_webmercator, CASE WHEN id > 175 THEN id - 175 ELSE id - 175 + 254 END id FROM route r8 UNION SELECT cartodb_id, the_geom_webmercator, CASE WHEN id > 200 THEN id - 200 ELSE id - 200 + 254 END id FROM route r9 UNION SELECT cartodb_id, the_geom_webmercator, CASE WHEN id > 225 THEN id - 225 ELSE id - 225 + 254 END id FROM route r10 ) SELECT cartodb_id, the_geom_webmercator, ((random()*10-10) + id) id FROM laps) AS wrapped_query WHERE <%= layer2 %>=1;</span>
<span class="code">SELECT * FROM (WITH route AS ( SELECT * FROM paseo WHERE route = 1 AND lap>0 ORDER BY id) SELECT 1 cartodb_id, ST_MakeLine(the_geom_webmercator) as the_geom_webmercator FROM route GROUP BY lap) AS wrapped_query WHERE <%= layer0 %>=1;</span>
<span class="code">SELECT * FROM (SELECT * FROM paseo WHERE route = 1 and lap = 0) AS wrapped_query WHERE <%= layer1 %>=1;</span>
</div>
</td>
</tr>
<tr>
<td scope="row"><a href="https://xurxosanz.carto.com/viz/76153e00-d151-11e4-b407-0e853d047bba/map">Checkins by User</a></td>
<td>2015-03-23 11:40:53+00:00</td>
<td></td>
<td>
<div>
<span class="code">SELECT * FROM (SELECT *, to_timestamp(ts/1000) as date FROM checkins WHERE ts > 1426377600000 and the_user in( '55005913b95307390369a8b8', '55060ab47451ec8e2e97de08', '55044ce55d39156d258a8605', '55036a685d39156d258a7bd3', '55084a607451ec8e2e9802f0' )) AS wrapped_query WHERE <%= layer0 %>=1;</span>
</div>
</td>
</tr>
<tr>
<td scope="row"><a href="https://xurxosanz.carto.com/viz/01221a44-1bf5-11e4-8240-0e73339ffa50/map">Storms 2008</a></td>
<td>2014-08-04 16:33:02+00:00</td>
<td>Thunders detected by weather stations over 2008</td>
<td>
<div>
<span class="code">select * from thunders;</span>
</div>
</td>
</tr>
</tbody>
</table>
<h2><a name="datasets"></a>Datasets</h2>
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
<th scope="col">name</th>
<th scope="col">created</th>
<th scope="col">privacy</th>
<th scope="col">tags</th>
<th scope="col">description</th>
</tr>
</thead>
<tbody>
<t>
<td scope="row"><a href="https://xurxosanz.carto.com/tables/area_estadistica">area_estadistica</a></td>
<td>2017-04-30 14:57:31+00:00</td>
<td>PUBLIC</td>
<td>
</td>
<td></td>
</tr>
<t>
<td scope="row"><a href="https://xurxosanz.carto.com/tables/bicing">bicing</a></td>
<td>2017-04-24 21:10:35+00:00</td>
<td>PUBLIC</td>
<td>
</td>
<td></td>
</tr>
<t>
<td scope="row"><a href="https://xurxosanz.carto.com/tables/barrio">barrio</a></td>
<td>2017-04-24 19:58:16+00:00</td>
<td>PUBLIC</td>
<td>
</td>
<td></td>
</tr>
<t>
<td scope="row"><a href="https://xurxosanz.carto.com/tables/ne_10m_populated_places_simple">ne_10m_populated_places_simple</a></td>
<td>2017-02-23 11:38:49+00:00</td>
<td>PUBLIC</td>
<td>
Cultural datasets
</td>
<td>Most populated places.</td>
</tr>
<t>
<td scope="row"><a href="https://xurxosanz.carto.com/tables/world_borders">world_borders</a></td>
<td>2017-03-07 18:37:48+00:00</td>
<td>PUBLIC</td>
<td>
Administrative regions
</td>
<td>World countries borders.</td>
</tr>
<t>
<td scope="row"><a href="https://xurxosanz.carto.com/tables/paseo2">paseo2</a></td>
<td>2015-08-27 10:10:39+00:00</td>
<td>PUBLIC</td>
<td>
</td>
<td></td>
</tr>
<t>
<td scope="row"><a href="https://xurxosanz.carto.com/tables/checkins">checkins</a></td>
<td>2015-03-23 11:26:43+00:00</td>
<td>PUBLIC</td>
<td>
</td>
<td></td>
</tr>
<t>
<td scope="row"><a href="https://xurxosanz.carto.com/tables/paseo">paseo</a></td>
<td>2014-09-20 09:35:01+00:00</td>
<td>PUBLIC</td>
<td>
</td>
<td></td>
</tr>
<t>
<td scope="row"><a href="https://xurxosanz.carto.com/tables/observations_month">observations_month</a></td>
<td>2014-08-19 10:18:41+00:00</td>
<td>PUBLIC</td>
<td>
</td>
<td></td>
</tr>
<t>
<td scope="row"><a href="https://xurxosanz.carto.com/tables/thunders">thunders</a></td>
<td>2014-08-04 16:32:34+00:00</td>
<td>PUBLIC</td>
<td>
</td>
<td></td>
</tr>
<t>
<td scope="row"><a href="https://xurxosanz.carto.com/tables/stations">stations</a></td>
<td>2014-08-02 14:57:26+00:00</td>
<td>PUBLIC</td>
<td>
</td>
<td></td>
</tr>
</tbody>
</table>
</div>
</body>
</html>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Account report</title>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
<style>
.code{
white-space: pre-wrap;
font-family: Courier New, Courier, monospace;
font-size: smaller;
display: block;
margin-bottom: 10px;
}
</style>
</head>
<body>
<div class="container-fluid">
<h1>{{ USER }} maps and datasets</h1>
<p><a href="#datasets">jump to datasets</a></p>
<h2>Maps</h2>
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
<th style="min-width: 300px;" scope="col">name</th>
<th scope="col">created</th>
<th style="min-width: 450px" scope="col">description</th>
<th scope="col">queries</th>
</tr>
</thead>
<tbody>
{% for m in maps %}
<tr>
<td scope="row"><a href="{{m.url}}">{{ m.name }}</a></td>
<td>{{ m.created }}</td>
<td>{% if m.description != None %}{{ m.description }}{% endif %}</td>
<td>
<div>
{% for source in m.sources_flat %}
<span class="code">{{ source }};</span>
{% endfor %}</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
<h2><a name="datasets"></a>Datasets</h2>
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
<th scope="col">name</th>
<th scope="col">created</th>
<th scope="col">privacy</th>
<th scope="col">tags</th>
<th scope="col">description</th>
</tr>
</thead>
<tbody>
{% for m in datasets %}
<t>
<td scope="row"><a href="https://{{ USER }}.carto.com/tables/{{m.name}}">{{ m.name }}</a></td>
<td>{{ m.created }}</td>
<td>{{ m.privacy }}</td>
<td>
{% for tag in m.tags %}
{{ tag }}{% if loop.nextitem is defined %}, {% endif %}
{% endfor %}
</td>
<td>{% if m.description != None %}{{ m.description }}{% endif %}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment