Skip to content

Instantly share code, notes, and snippets.

@harry-wood
Last active January 18, 2026 02:06
Show Gist options
  • Select an option

  • Save harry-wood/cc3da38ab46ecede058ba3ad237536e6 to your computer and use it in GitHub Desktop.

Select an option

Save harry-wood/cc3da38ab46ecede058ba3ad237536e6 to your computer and use it in GitHub Desktop.
Bash script to create the notes density map as seen at https://harrywood.co.uk/maps/notes/density.html and an intial step as a ruby script which converts the notes planet dump XML in to an SQLite file.
#!/usr/bin/env bash
set -euo pipefail
# Bash script to create the notes density map as seen at https://harrywood.co.uk/maps/notes/density.html
#
# First we download the planet of notes data and convert it to SQLite, then back to geojson files which we render with GDAL tools.
# The process is repeated first for closed notes (green), then for open notes (red) to create two tile layers.
# CONFIG
DB="notes.db"
TABLE="notes"
OUTDIR="tiles"
TMPDIR="tmp_rasters"
mkdir -p "$OUTDIR" "$TMPDIR"
echo "=== Downloading latest notes planet file ==="
wget https://planet.openstreetmap.org/notes/planet-notes-latest.osn.bz2
echo "=== Loading into SQLite with xml_to_sqlite.rb ==="
bzcat planet-notes-latest.osn.bz2 | ruby xml_to_sqlite.rb
# Export SQLite table to GeoJSON
echo "=== Exporting to GeoJSON ==="
ogr2ogr -f GeoJSON notes_closed.geojson "$DB" \
-sql "SELECT id, lat, lon, MakePoint(lon, lat, 4326) AS geometry FROM $TABLE WHERE closed_at IS NOT NULL"
echo "=== Generating chunk rasters ==="
CHUNK=0
# World grid in 60°×60° chunks
for LON_MIN in -180 -120 -60 0 60 120; do
LON_MAX=$((LON_MIN+60))
for LAT_MIN in -90 -30 30; do
LAT_MAX=$((LAT_MIN+60))
CHUNK=$((CHUNK+1))
OUTFILE="$TMPDIR/chunk_closed_${CHUNK}.tif"
echo " → Chunk $CHUNK : lon $LON_MIN to $LON_MAX , lat $LAT_MIN to $LAT_MAX"
gdal_grid \
-zfield id \
-a count:radius=0.004 \
-txe $LON_MIN $LON_MAX -tye $LAT_MIN $LAT_MAX \
-outsize 12000 6000 \
-l SELECT \
-of GTiff \
"notes_closed.geojson" "$OUTFILE" || echo " (no points in this region)"
done
done
echo "=== Merging chunks ==="
gdal_merge.py -o "$TMPDIR/closed_notes_density.tif" -of GTiff -co COMPRESS=DEFLATE -co TILED=YES $TMPDIR/chunk_closed_*.tif
echo "=== Colorizing ==="
# Color ramp for gdaldem
cat > "$TMPDIR/colors.txt" <<'EOF'
0 0 0 0 0
1 0 100 0 180
3 0 150 0 255
25 0 255 0 255
80 200 255 200 255
500 255 255 255 255
EOF
gdal_edit.py -a_nodata 0 "$TMPDIR/closed_notes_density.tif"
gdaldem color-relief -alpha "$TMPDIR/closed_notes_density.tif" "$TMPDIR/colors.txt" "$TMPDIR/world_color.tif"
echo "=== Reprojecting to Web Mercator ==="
rm -f "$TMPDIR/world_merc.tif"
gdalwarp -t_srs EPSG:3857 -r bilinear -co COMPRESS=DEFLATE -co TILED=YES "$TMPDIR/world_color.tif" "$TMPDIR/world_merc.tif"
rm -f "$TMPDIR/world_merc_trimmed.tif"
gdalwarp \
-te -20037508.34 -11188890.0 20037508.34 11188890.0 \
-co TILED=YES -co COMPRESS=DEFLATE \
"$TMPDIR/world_merc.tif" "$TMPDIR/world_merc_trimmed.tif"
echo "=== Generating tiles (zoom 0–9) ==="
gdal2tiles.py -z 0-9 --exclude "$TMPDIR/world_merc_trimmed.tif" "tiles_closed"
echo "=== Done closed notes tiles: 'tiles_closed' ==="
echo "=== Exporting open notes to GeoJSON ==="
ogr2ogr -f GeoJSON notes-open.geojson "$DB" \
-sql "SELECT id, lat, lon, MakePoint(lon, lat, 4326) AS geometry FROM $TABLE WHERE closed_at IS NULL"
echo "=== Generating chunk rasters ==="
CHUNK=0
# World grid in 60°×60° chunks
for LON_MIN in -180 -120 -60 0 60 120; do
LON_MAX=$((LON_MIN+60))
for LAT_MIN in -90 -30 30; do
LAT_MAX=$((LAT_MIN+60))
CHUNK=$((CHUNK+1))
OUTFILE="$TMPDIR/chunk_open_${CHUNK}.tif"
echo " → Chunk $CHUNK : lon $LON_MIN to $LON_MAX , lat $LAT_MIN to $LAT_MAX"
gdal_grid \
-zfield id \
-a count:radius=0.004 \
-txe $LON_MIN $LON_MAX -tye $LAT_MIN $LAT_MAX \
-outsize 12000 6000 \
-l SELECT \
-of GTiff \
"notes-open.geojson" "$OUTFILE" || echo " (no points in this region)"
done
done
echo "=== Merging chunks ==="
gdal_merge.py -o"$TMPDIR/open_notes_density.tif" -of GTiff -co COMPRESS=DEFLATE -co TILED=YES $TMPDIR/chunk_open_*.tif
echo "=== Colorizing ==="
# Color ramp for gdaldem
cat > "$TMPDIR/colors_open.txt" <<'EOF'
0 0 0 0 0
1 120 0 0 180
3 170 0 0 255
8 255 0 0 255
15 255 200 200 255
500 255 255 255 255
EOF
gdal_edit.py -a_nodata 0 "$TMPDIR/open_notes_density.tif"
gdaldem color-relief -alpha "$TMPDIR/open_notes_density.tif" "$TMPDIR/colors_open.txt" "$TMPDIR/world_color_open.tif"
echo "=== Reprojecting to Web Mercator ==="
rm -f "$TMPDIR/world_merc_open.tif"
gdalwarp -t_srs EPSG:3857 -r bilinear -co COMPRESS=DEFLATE -co TILED=YES "$TMPDIR/world_color_open.tif" "$TMPDIR/world_merc_open.tif"
rm -f "$TMPDIR/world_merc_open_trimmed.tif"
gdalwarp \
-te -20037508.34 -11188890.0 20037508.34 11188890.0 \
-co TILED=YES -co COMPRESS=DEFLATE \
"$TMPDIR/world_merc_open.tif" "$TMPDIR/world_merc_open_trimmed.tif"
echo "=== Generating tiles (zoom 0–9) ==="
OUTDIR_OPEN="tiles_open"
gdal2tiles.py -z 0-9 --exclude "$TMPDIR/world_merc_open_trimmed.tif" "tiles_open"
echo "=== Done! Tiles are in 'tiles_open' ==="
#!/usr/bin/env ruby
# frozen_string_literal: true
#
# Ruby script to copy OpenStreetMap notes dump XML into an SQLite database.
#
# Streams XML from STDIN, extracts <note> ... </note> blocks, and writes them into a SQLite DB.
# Tries to use Nokogiri if available; falls back to REXML (stdlib) for parsing each note chunk.
#
# Usage:
# bzcat planet-notes-latest.osn.bz2 | ruby xml_to_sqlite.rb
#
require 'sqlite3'
# Try to load Nokogiri; fall back to REXML if not available.
nokogiri_available = begin
require 'nokogiri'
true
rescue LoadError
false
end
require 'rexml/document' unless nokogiri_available
require 'stringio'
DB_FILE = 'notes.db'
# --- Initialize DB ---
db = SQLite3::Database.new(DB_FILE)
db.execute_batch <<~SQL
PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS notes (
id INTEGER PRIMARY KEY,
lat REAL,
lon REAL,
created_at TEXT,
closed_at TEXT
);
CREATE TABLE IF NOT EXISTS comments (
id INTEGER PRIMARY KEY AUTOINCREMENT,
note_id INTEGER,
action TEXT,
timestamp TEXT,
uid INTEGER,
user TEXT,
text TEXT,
FOREIGN KEY(note_id) REFERENCES notes(id)
);
SQL
insert_note_stmt = db.prepare(
"INSERT OR REPLACE INTO notes (id, lat, lon, created_at, closed_at) VALUES (?, ?, ?, ?, ?)"
)
insert_comment_stmt = db.prepare(
"INSERT INTO comments (note_id, action, timestamp, uid, user, text) VALUES (?, ?, ?, ?, ?, ?)"
)
# Helper: parse a single <note>...</note> XML string and insert into DB
def process_note_xml(xml_str, nokogiri_available, insert_note_stmt, insert_comment_stmt)
if nokogiri_available
doc = Nokogiri::XML(xml_str) { |cfg| cfg.noblanks } # remove blank nodes
note_el = doc.at_xpath('/note')
return unless note_el
note_id = note_el['id']
lat = note_el['lat']
lon = note_el['lon']
created_at = note_el['created_at']
closed_at = note_el['closed_at']
insert_note_stmt.execute(note_id, lat, lon, created_at, closed_at)
# iterate comments
note_el.xpath('comment').each do |c|
action = c['action']
timestamp = c['timestamp']
uid = c['uid']
user = c['user']
text = c.text ? c.text.strip : nil
insert_comment_stmt.execute(note_id, action, timestamp, uid, user, text)
end
else
# Use REXML
# REXML::Document expects a complete XML document; we wrap the note in a dummy root.
wrapped = "<root>#{xml_str}</root>"
doc = REXML::Document.new(wrapped)
note_el = doc.root.elements['note']
return unless note_el
note_id = note_el.attributes['id']
lat = note_el.attributes['lat']
lon = note_el.attributes['lon']
created_at = note_el.attributes['created_at']
closed_at = note_el.attributes['closed_at']
@count += 1
puts "#{@count} Inserting note #{note_id}" if @count % 100 == 0
insert_note_stmt.execute(note_id, lat, lon, created_at, closed_at)
note_el.elements.each('comment') do |c|
action = c.attributes['action']
timestamp = c.attributes['timestamp']
uid = c.attributes['uid']
user = c.attributes['user']
# REXML::Element#text may be nil; join all text nodes to preserve newlines
text = (c.text || '').strip
insert_comment_stmt.execute(note_id, action, timestamp, uid, user, text)
end
end
rescue => e
warn "Warning: failed to parse/insert a note block: #{e.class}: #{e.message}"
end
@count = 0
# --- Stream from STDIN, split by closing </note> tags ---
buffer = ''.dup
note_close_tag = '</note>'
# We'll support optional XML declaration or whitespace before notes, and multiple notes per input.
# Read in binary to preserve any encodings; we'll rely on the XML parser to handle encoding declarations.
STDIN.binmode
while chunk = STDIN.read(16 * 1024)
buffer << chunk
# process all complete </note> occurrences in buffer
loop do
idx = buffer.index(note_close_tag)
break unless idx
end_idx = idx + note_close_tag.length
note_chunk = buffer.slice!(0...end_idx)
# Trim leading whitespace/newlines so the chunk is a single <note>...</note>
note_chunk.strip!
next if note_chunk.empty?
# Some inputs may contain multiple top-level nodes or stray characters; ensure we start with <note
start_idx = note_chunk.index('<note')
if start_idx && start_idx > 0
# discard any leading garbage before <note
note_chunk = note_chunk[start_idx..-1]
end
# Process this note chunk
process_note_xml(note_chunk, nokogiri_available, insert_note_stmt, insert_comment_stmt)
end
end
# After EOF, there might be remaining buffered data that doesn't end with </note>.
# Try to see if it contains a final complete note (rare), otherwise ignore.
if !buffer.strip.empty?
# attempt to extract a trailing <note>...</note> if present
if m = buffer.match(/(<note\b.*<\/note>)/m)
note_chunk = m[1].strip
process_note_xml(note_chunk, nokogiri_available, insert_note_stmt, insert_comment_stmt)
else
warn "Warning: leftover data after EOF that does not contain a complete </note> block; ignoring."
end
end
insert_note_stmt.close
insert_comment_stmt.close
db.close
puts "Done at #{Time.now.to_s}. Data saved to #{DB_FILE} (parser: #{nokogiri_available ? 'nokogiri' : 'rexml'})"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment