Last active
January 18, 2026 02:06
-
-
Save harry-wood/cc3da38ab46ecede058ba3ad237536e6 to your computer and use it in GitHub Desktop.
Bash script to create the notes density map as seen at https://harrywood.co.uk/maps/notes/density.html and an intial step as a ruby script which converts the notes planet dump XML in to an SQLite file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| # Bash script to create the notes density map as seen at https://harrywood.co.uk/maps/notes/density.html | |
| # | |
| # First we download the planet of notes data and convert it to SQLite, then back to geojson files which we render with GDAL tools. | |
| # The process is repeated first for closed notes (green), then for open notes (red) to create two tile layers. | |
| # CONFIG | |
| DB="notes.db" | |
| TABLE="notes" | |
| OUTDIR="tiles" | |
| TMPDIR="tmp_rasters" | |
| mkdir -p "$OUTDIR" "$TMPDIR" | |
| echo "=== Downloading latest notes planet file ===" | |
| wget https://planet.openstreetmap.org/notes/planet-notes-latest.osn.bz2 | |
| echo "=== Loading into SQLite with xml_to_sqlite.rb ===" | |
| bzcat planet-notes-latest.osn.bz2 | ruby xml_to_sqlite.rb | |
| # Export SQLite table to GeoJSON | |
| echo "=== Exporting to GeoJSON ===" | |
| ogr2ogr -f GeoJSON notes_closed.geojson "$DB" \ | |
| -sql "SELECT id, lat, lon, MakePoint(lon, lat, 4326) AS geometry FROM $TABLE WHERE closed_at IS NOT NULL" | |
| echo "=== Generating chunk rasters ===" | |
| CHUNK=0 | |
| # World grid in 60°×60° chunks | |
| for LON_MIN in -180 -120 -60 0 60 120; do | |
| LON_MAX=$((LON_MIN+60)) | |
| for LAT_MIN in -90 -30 30; do | |
| LAT_MAX=$((LAT_MIN+60)) | |
| CHUNK=$((CHUNK+1)) | |
| OUTFILE="$TMPDIR/chunk_closed_${CHUNK}.tif" | |
| echo " → Chunk $CHUNK : lon $LON_MIN to $LON_MAX , lat $LAT_MIN to $LAT_MAX" | |
| gdal_grid \ | |
| -zfield id \ | |
| -a count:radius=0.004 \ | |
| -txe $LON_MIN $LON_MAX -tye $LAT_MIN $LAT_MAX \ | |
| -outsize 12000 6000 \ | |
| -l SELECT \ | |
| -of GTiff \ | |
| "notes_closed.geojson" "$OUTFILE" || echo " (no points in this region)" | |
| done | |
| done | |
| echo "=== Merging chunks ===" | |
| gdal_merge.py -o "$TMPDIR/closed_notes_density.tif" -of GTiff -co COMPRESS=DEFLATE -co TILED=YES $TMPDIR/chunk_closed_*.tif | |
| echo "=== Colorizing ===" | |
| # Color ramp for gdaldem | |
| cat > "$TMPDIR/colors.txt" <<'EOF' | |
| 0 0 0 0 0 | |
| 1 0 100 0 180 | |
| 3 0 150 0 255 | |
| 25 0 255 0 255 | |
| 80 200 255 200 255 | |
| 500 255 255 255 255 | |
| EOF | |
| gdal_edit.py -a_nodata 0 "$TMPDIR/closed_notes_density.tif" | |
| gdaldem color-relief -alpha "$TMPDIR/closed_notes_density.tif" "$TMPDIR/colors.txt" "$TMPDIR/world_color.tif" | |
| echo "=== Reprojecting to Web Mercator ===" | |
| rm -f "$TMPDIR/world_merc.tif" | |
| gdalwarp -t_srs EPSG:3857 -r bilinear -co COMPRESS=DEFLATE -co TILED=YES "$TMPDIR/world_color.tif" "$TMPDIR/world_merc.tif" | |
| rm -f "$TMPDIR/world_merc_trimmed.tif" | |
| gdalwarp \ | |
| -te -20037508.34 -11188890.0 20037508.34 11188890.0 \ | |
| -co TILED=YES -co COMPRESS=DEFLATE \ | |
| "$TMPDIR/world_merc.tif" "$TMPDIR/world_merc_trimmed.tif" | |
| echo "=== Generating tiles (zoom 0–9) ===" | |
| gdal2tiles.py -z 0-9 --exclude "$TMPDIR/world_merc_trimmed.tif" "tiles_closed" | |
| echo "=== Done closed notes tiles: 'tiles_closed' ===" | |
| echo "=== Exporting open notes to GeoJSON ===" | |
| ogr2ogr -f GeoJSON notes-open.geojson "$DB" \ | |
| -sql "SELECT id, lat, lon, MakePoint(lon, lat, 4326) AS geometry FROM $TABLE WHERE closed_at IS NULL" | |
| echo "=== Generating chunk rasters ===" | |
| CHUNK=0 | |
| # World grid in 60°×60° chunks | |
| for LON_MIN in -180 -120 -60 0 60 120; do | |
| LON_MAX=$((LON_MIN+60)) | |
| for LAT_MIN in -90 -30 30; do | |
| LAT_MAX=$((LAT_MIN+60)) | |
| CHUNK=$((CHUNK+1)) | |
| OUTFILE="$TMPDIR/chunk_open_${CHUNK}.tif" | |
| echo " → Chunk $CHUNK : lon $LON_MIN to $LON_MAX , lat $LAT_MIN to $LAT_MAX" | |
| gdal_grid \ | |
| -zfield id \ | |
| -a count:radius=0.004 \ | |
| -txe $LON_MIN $LON_MAX -tye $LAT_MIN $LAT_MAX \ | |
| -outsize 12000 6000 \ | |
| -l SELECT \ | |
| -of GTiff \ | |
| "notes-open.geojson" "$OUTFILE" || echo " (no points in this region)" | |
| done | |
| done | |
| echo "=== Merging chunks ===" | |
| gdal_merge.py -o"$TMPDIR/open_notes_density.tif" -of GTiff -co COMPRESS=DEFLATE -co TILED=YES $TMPDIR/chunk_open_*.tif | |
| echo "=== Colorizing ===" | |
| # Color ramp for gdaldem | |
| cat > "$TMPDIR/colors_open.txt" <<'EOF' | |
| 0 0 0 0 0 | |
| 1 120 0 0 180 | |
| 3 170 0 0 255 | |
| 8 255 0 0 255 | |
| 15 255 200 200 255 | |
| 500 255 255 255 255 | |
| EOF | |
| gdal_edit.py -a_nodata 0 "$TMPDIR/open_notes_density.tif" | |
| gdaldem color-relief -alpha "$TMPDIR/open_notes_density.tif" "$TMPDIR/colors_open.txt" "$TMPDIR/world_color_open.tif" | |
| echo "=== Reprojecting to Web Mercator ===" | |
| rm -f "$TMPDIR/world_merc_open.tif" | |
| gdalwarp -t_srs EPSG:3857 -r bilinear -co COMPRESS=DEFLATE -co TILED=YES "$TMPDIR/world_color_open.tif" "$TMPDIR/world_merc_open.tif" | |
| rm -f "$TMPDIR/world_merc_open_trimmed.tif" | |
| gdalwarp \ | |
| -te -20037508.34 -11188890.0 20037508.34 11188890.0 \ | |
| -co TILED=YES -co COMPRESS=DEFLATE \ | |
| "$TMPDIR/world_merc_open.tif" "$TMPDIR/world_merc_open_trimmed.tif" | |
| echo "=== Generating tiles (zoom 0–9) ===" | |
| OUTDIR_OPEN="tiles_open" | |
| gdal2tiles.py -z 0-9 --exclude "$TMPDIR/world_merc_open_trimmed.tif" "tiles_open" | |
| echo "=== Done! Tiles are in 'tiles_open' ===" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| # frozen_string_literal: true | |
| # | |
| # Ruby script to copy OpenStreetMap notes dump XML into an SQLite database. | |
| # | |
| # Streams XML from STDIN, extracts <note> ... </note> blocks, and writes them into a SQLite DB. | |
| # Tries to use Nokogiri if available; falls back to REXML (stdlib) for parsing each note chunk. | |
| # | |
| # Usage: | |
| # bzcat planet-notes-latest.osn.bz2 | ruby xml_to_sqlite.rb | |
| # | |
| require 'sqlite3' | |
| # Try to load Nokogiri; fall back to REXML if not available. | |
| nokogiri_available = begin | |
| require 'nokogiri' | |
| true | |
| rescue LoadError | |
| false | |
| end | |
| require 'rexml/document' unless nokogiri_available | |
| require 'stringio' | |
| DB_FILE = 'notes.db' | |
| # --- Initialize DB --- | |
| db = SQLite3::Database.new(DB_FILE) | |
| db.execute_batch <<~SQL | |
| PRAGMA foreign_keys = ON; | |
| CREATE TABLE IF NOT EXISTS notes ( | |
| id INTEGER PRIMARY KEY, | |
| lat REAL, | |
| lon REAL, | |
| created_at TEXT, | |
| closed_at TEXT | |
| ); | |
| CREATE TABLE IF NOT EXISTS comments ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| note_id INTEGER, | |
| action TEXT, | |
| timestamp TEXT, | |
| uid INTEGER, | |
| user TEXT, | |
| text TEXT, | |
| FOREIGN KEY(note_id) REFERENCES notes(id) | |
| ); | |
| SQL | |
| insert_note_stmt = db.prepare( | |
| "INSERT OR REPLACE INTO notes (id, lat, lon, created_at, closed_at) VALUES (?, ?, ?, ?, ?)" | |
| ) | |
| insert_comment_stmt = db.prepare( | |
| "INSERT INTO comments (note_id, action, timestamp, uid, user, text) VALUES (?, ?, ?, ?, ?, ?)" | |
| ) | |
| # Helper: parse a single <note>...</note> XML string and insert into DB | |
| def process_note_xml(xml_str, nokogiri_available, insert_note_stmt, insert_comment_stmt) | |
| if nokogiri_available | |
| doc = Nokogiri::XML(xml_str) { |cfg| cfg.noblanks } # remove blank nodes | |
| note_el = doc.at_xpath('/note') | |
| return unless note_el | |
| note_id = note_el['id'] | |
| lat = note_el['lat'] | |
| lon = note_el['lon'] | |
| created_at = note_el['created_at'] | |
| closed_at = note_el['closed_at'] | |
| insert_note_stmt.execute(note_id, lat, lon, created_at, closed_at) | |
| # iterate comments | |
| note_el.xpath('comment').each do |c| | |
| action = c['action'] | |
| timestamp = c['timestamp'] | |
| uid = c['uid'] | |
| user = c['user'] | |
| text = c.text ? c.text.strip : nil | |
| insert_comment_stmt.execute(note_id, action, timestamp, uid, user, text) | |
| end | |
| else | |
| # Use REXML | |
| # REXML::Document expects a complete XML document; we wrap the note in a dummy root. | |
| wrapped = "<root>#{xml_str}</root>" | |
| doc = REXML::Document.new(wrapped) | |
| note_el = doc.root.elements['note'] | |
| return unless note_el | |
| note_id = note_el.attributes['id'] | |
| lat = note_el.attributes['lat'] | |
| lon = note_el.attributes['lon'] | |
| created_at = note_el.attributes['created_at'] | |
| closed_at = note_el.attributes['closed_at'] | |
| @count += 1 | |
| puts "#{@count} Inserting note #{note_id}" if @count % 100 == 0 | |
| insert_note_stmt.execute(note_id, lat, lon, created_at, closed_at) | |
| note_el.elements.each('comment') do |c| | |
| action = c.attributes['action'] | |
| timestamp = c.attributes['timestamp'] | |
| uid = c.attributes['uid'] | |
| user = c.attributes['user'] | |
| # REXML::Element#text may be nil; join all text nodes to preserve newlines | |
| text = (c.text || '').strip | |
| insert_comment_stmt.execute(note_id, action, timestamp, uid, user, text) | |
| end | |
| end | |
| rescue => e | |
| warn "Warning: failed to parse/insert a note block: #{e.class}: #{e.message}" | |
| end | |
| @count = 0 | |
| # --- Stream from STDIN, split by closing </note> tags --- | |
| buffer = ''.dup | |
| note_close_tag = '</note>' | |
| # We'll support optional XML declaration or whitespace before notes, and multiple notes per input. | |
| # Read in binary to preserve any encodings; we'll rely on the XML parser to handle encoding declarations. | |
| STDIN.binmode | |
| while chunk = STDIN.read(16 * 1024) | |
| buffer << chunk | |
| # process all complete </note> occurrences in buffer | |
| loop do | |
| idx = buffer.index(note_close_tag) | |
| break unless idx | |
| end_idx = idx + note_close_tag.length | |
| note_chunk = buffer.slice!(0...end_idx) | |
| # Trim leading whitespace/newlines so the chunk is a single <note>...</note> | |
| note_chunk.strip! | |
| next if note_chunk.empty? | |
| # Some inputs may contain multiple top-level nodes or stray characters; ensure we start with <note | |
| start_idx = note_chunk.index('<note') | |
| if start_idx && start_idx > 0 | |
| # discard any leading garbage before <note | |
| note_chunk = note_chunk[start_idx..-1] | |
| end | |
| # Process this note chunk | |
| process_note_xml(note_chunk, nokogiri_available, insert_note_stmt, insert_comment_stmt) | |
| end | |
| end | |
| # After EOF, there might be remaining buffered data that doesn't end with </note>. | |
| # Try to see if it contains a final complete note (rare), otherwise ignore. | |
| if !buffer.strip.empty? | |
| # attempt to extract a trailing <note>...</note> if present | |
| if m = buffer.match(/(<note\b.*<\/note>)/m) | |
| note_chunk = m[1].strip | |
| process_note_xml(note_chunk, nokogiri_available, insert_note_stmt, insert_comment_stmt) | |
| else | |
| warn "Warning: leftover data after EOF that does not contain a complete </note> block; ignoring." | |
| end | |
| end | |
| insert_note_stmt.close | |
| insert_comment_stmt.close | |
| db.close | |
| puts "Done at #{Time.now.to_s}. Data saved to #{DB_FILE} (parser: #{nokogiri_available ? 'nokogiri' : 'rexml'})" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment