wboykinm · March 21, 2019 20:38
diff --git a/robosat_vt.sh b/robosat_vt.sh
 # robosat workflow to classify buildings in rutland, vt:

 # (to check on any file locally in this process):
 # docker cp <IMAGE_ID>:/app/<filename> .

 docker pull mapbox/robosat:latest-cpu
 docker run -i -t mapbox/robosat:latest-cpu /bin/bash

 # configure some things
 apt-get update && apt-get install -y sudo && rm -rf /var/lib/apt/lists/*
 sudo apt-get update
 sudo apt-get install curl software-properties-common -y
 curl -sL https://deb.nodesource.com/setup_10.x | bash -
 sudo add-apt-repository ppa:ubuntugis/ppa -y && sudo apt-get update
 sudo apt-get install wget nodejs gdal-bin vim less -y
 pip3 install csvkit

 # get vt osm data
 wget https://download.geofabrik.de/north-america/us/vermont-latest.osm.pbf

 # pull out buildings as geojson
 ./rs extract --type building vermont-latest.osm.pbf vt-building.geojson

 # clip training data to middlebury town:
 ogr2ogr -f "GeoJSON" -t_srs "EPSG:4326" middlebury-building.geojson vt-building.geojson -clipsrc -73.2134 43.9801 -73.0891 44.0650
 # generate a single geojson feature for rutland area:
 echo '{ "type": "FeatureCollection", "features": [ { "type": "Feature", "properties": {}, "geometry": { "type": "Polygon", "coordinates": [ [ [ -73.4161376953125, 43.33816367935935 ], [ -72.83935546875, 43.33816367935935 ], [ -72.83935546875, 43.819665724206956 ], [ -73.4161376953125, 43.819665724206956 ], [ -73.4161376953125, 43.33816367935935 ] ] ] } } ] }' > rutland.geojson

 # get cover tile list for both
 ./rs cover --zoom 17 middlebury-building.geojson middlebury-building.csv
 ./rs cover --zoom 17 rutland.geojson rutland.csv

 # download from vcgi tiles (because free is good, and CIR might be dope)
 ./rs download https://maps.vcgi.vermont.gov/arcgis/rest/services/EGC_services/IMG_VCGI_CIR_WM_CACHE/ImageServer/tile/{z}/{y}/{x} middlebury-building.csv middlebury-images
 ./rs download https://maps.vcgi.vermont.gov/arcgis/rest/services/EGC_services/IMG_VCGI_CIR_WM_CACHE/ImageServer/tile/{z}/{y}/{x} rutland.csv rutland-images


 # rasterize (after setting up the config file)
 echo "
 # Configuration related to a specific dataset.
 # For syntax see: https://github.com/toml-lang/toml#table-of-contents


 # Dataset specific common attributes.
 [common]

  # The slippy map dataset's base directory.
  dataset = 'vt/middlebury'

  # Human representation for classes.
  classes = ['background', 'building']

  # Color map for visualization and representing classes in masks.
  # Note: available colors can be found in 'robosat/colors.py'
  colors  = ['denim', 'orange']


 # Dataset specific class weights computes on the training data.
 # Note: use './rs weights -h' to compute these for new datasets.
 [weights]
  values = [1.451183, 21.289612]
 " > config/dataset-building-middlebury.toml

 ./rs rasterize --dataset config/dataset-building-middlebury.toml --zoom 17 --size 256 middlebury-building.geojson middlebury-building.csv middlebury-building

 # set up some directories
 rm -r vt
 mkdir vt
 mkdir vt/middlebury
 mkdir vt/middlebury/training
 mkdir vt/middlebury/validation
 mkdir vt/middlebury/training/images
 mkdir vt/middlebury/training/labels
 mkdir vt/middlebury/validation/images
 mkdir vt/middlebury/validation/labels
 mkdir vt/middlebury/tmp
 cp -r middlebury-images/* vt/middlebury/training/images/
 cp -r middlebury-building/* vt/middlebury/training/labels/

 # weights
 ./rs weights --dataset config/dataset-building-middlebury.toml

 # train (after setting up another config file)
 echo "
 # Configuration related to a specific model.
 # For syntax see: https://github.com/toml-lang/toml#table-of-contents


 # Model specific common attributes.
 [common]

  # Use CUDA for GPU acceleration.
  cuda       = false

  # Batch size for training.
  batch_size = 1

  # Image side size in pixels.
  image_size = 256

  # Directory where to save checkpoints to during training.
  checkpoint = 'vt/middlebury/tmp/'


 # Model specific optimization parameters.
 [opt]

  # Total number of epochs to train for.
  epochs     = 12

  # Learning rate for the optimizer.
  lr         = 0.0001

  # Weight decay l2 penalty for the optimizer
  decay      = 0.0001
  
  # Loss function name (e.g 'Lovasz', 'mIoU' or 'CrossEntropy')
  loss = 'Lovasz'
 " > config/model-unet-middlebury.toml

 # peel off a validation set (30%)
 THING=$(wc -l < middlebury-building.csv)
 THING2=$(( ( $THING - 1 ) / 10 * 3 ))
 sort -R middlebury-building.csv | head -n $THING2 > validation.csv
 while read p
 do
  x=$(echo $p | csvcut -c 1)
  y=$(echo $p | csvcut -c 2)
  z=$(echo $p | csvcut -c 3)
  mkdir -p vt/middlebury/validation/images/$z/$x/
  mv vt/middlebury/training/images/$z/$x/$y.webp vt/middlebury/validation/images/$z/$x/
  mkdir -p vt/middlebury/validation/labels/$z/$x/
  mv vt/middlebury/training/labels/$z/$x/$y.png vt/middlebury/validation/labels/$z/$x/
 done < validation.csv

 ./rs train --model config/model-unet-middlebury.toml --dataset config/dataset-building-middlebury.toml --workers 0

 # predict 
 mkdir probs
 ./rs predict --batch_size 1 --checkpoint vt/middlebury/tmp/checkpoint-00001-of-00001.pth --tile_size 256 --model config/model-unet-middlebury.toml --dataset config/dataset-building-middlebury.toml rutland/validation/images probs
	# robosat workflow to classify buildings in rutland, vt:

	# (to check on any file locally in this process):
	# docker cp <IMAGE_ID>:/app/<filename> .

	docker pull mapbox/robosat:latest-cpu
	docker run -i -t mapbox/robosat:latest-cpu /bin/bash

	# configure some things
	apt-get update && apt-get install -y sudo && rm -rf /var/lib/apt/lists/*
	sudo apt-get update
	sudo apt-get install curl software-properties-common -y
	curl -sL https://deb.nodesource.com/setup_10.x \| bash -
	sudo add-apt-repository ppa:ubuntugis/ppa -y && sudo apt-get update
	sudo apt-get install wget nodejs gdal-bin vim less -y
	pip3 install csvkit

	# get vt osm data
	wget https://download.geofabrik.de/north-america/us/vermont-latest.osm.pbf

	# pull out buildings as geojson
	./rs extract --type building vermont-latest.osm.pbf vt-building.geojson

	# clip training data to middlebury town:
	ogr2ogr -f "GeoJSON" -t_srs "EPSG:4326" middlebury-building.geojson vt-building.geojson -clipsrc -73.2134 43.9801 -73.0891 44.0650
	# generate a single geojson feature for rutland area:
	echo '{ "type": "FeatureCollection", "features": [ { "type": "Feature", "properties": {}, "geometry": { "type": "Polygon", "coordinates": [ [ [ -73.4161376953125, 43.33816367935935 ], [ -72.83935546875, 43.33816367935935 ], [ -72.83935546875, 43.819665724206956 ], [ -73.4161376953125, 43.819665724206956 ], [ -73.4161376953125, 43.33816367935935 ] ] ] } } ] }' > rutland.geojson

	# get cover tile list for both
	./rs cover --zoom 17 middlebury-building.geojson middlebury-building.csv
	./rs cover --zoom 17 rutland.geojson rutland.csv

	# download from vcgi tiles (because free is good, and CIR might be dope)
	./rs download https://maps.vcgi.vermont.gov/arcgis/rest/services/EGC_services/IMG_VCGI_CIR_WM_CACHE/ImageServer/tile/{z}/{y}/{x} middlebury-building.csv middlebury-images
	./rs download https://maps.vcgi.vermont.gov/arcgis/rest/services/EGC_services/IMG_VCGI_CIR_WM_CACHE/ImageServer/tile/{z}/{y}/{x} rutland.csv rutland-images


	# rasterize (after setting up the config file)
	echo "
	# Configuration related to a specific dataset.
	# For syntax see: https://github.com/toml-lang/toml#table-of-contents


	# Dataset specific common attributes.
	[common]

	# The slippy map dataset's base directory.
	dataset = 'vt/middlebury'

	# Human representation for classes.
	classes = ['background', 'building']

	# Color map for visualization and representing classes in masks.
	# Note: available colors can be found in 'robosat/colors.py'
	colors = ['denim', 'orange']


	# Dataset specific class weights computes on the training data.
	# Note: use './rs weights -h' to compute these for new datasets.
	[weights]
	values = [1.451183, 21.289612]
	" > config/dataset-building-middlebury.toml

	./rs rasterize --dataset config/dataset-building-middlebury.toml --zoom 17 --size 256 middlebury-building.geojson middlebury-building.csv middlebury-building

	# set up some directories
	rm -r vt
	mkdir vt
	mkdir vt/middlebury
	mkdir vt/middlebury/training
	mkdir vt/middlebury/validation
	mkdir vt/middlebury/training/images
	mkdir vt/middlebury/training/labels
	mkdir vt/middlebury/validation/images
	mkdir vt/middlebury/validation/labels
	mkdir vt/middlebury/tmp
	cp -r middlebury-images/* vt/middlebury/training/images/
	cp -r middlebury-building/* vt/middlebury/training/labels/

	# weights
	./rs weights --dataset config/dataset-building-middlebury.toml

	# train (after setting up another config file)
	echo "
	# Configuration related to a specific model.
	# For syntax see: https://github.com/toml-lang/toml#table-of-contents


	# Model specific common attributes.
	[common]

	# Use CUDA for GPU acceleration.
	cuda = false

	# Batch size for training.
	batch_size = 1

	# Image side size in pixels.
	image_size = 256

	# Directory where to save checkpoints to during training.
	checkpoint = 'vt/middlebury/tmp/'


	# Model specific optimization parameters.
	[opt]

	# Total number of epochs to train for.
	epochs = 12

	# Learning rate for the optimizer.
	lr = 0.0001

	# Weight decay l2 penalty for the optimizer
	decay = 0.0001

	# Loss function name (e.g 'Lovasz', 'mIoU' or 'CrossEntropy')
	loss = 'Lovasz'
	" > config/model-unet-middlebury.toml

	# peel off a validation set (30%)
	THING=$(wc -l < middlebury-building.csv)
	THING2=$(( ( $THING - 1 ) / 10 * 3 ))
	sort -R middlebury-building.csv \| head -n $THING2 > validation.csv
	while read p
	do
	x=$(echo $p \| csvcut -c 1)
	y=$(echo $p \| csvcut -c 2)
	z=$(echo $p \| csvcut -c 3)
	mkdir -p vt/middlebury/validation/images/$z/$x/
	mv vt/middlebury/training/images/$z/$x/$y.webp vt/middlebury/validation/images/$z/$x/
	mkdir -p vt/middlebury/validation/labels/$z/$x/
	mv vt/middlebury/training/labels/$z/$x/$y.png vt/middlebury/validation/labels/$z/$x/
	done < validation.csv

	./rs train --model config/model-unet-middlebury.toml --dataset config/dataset-building-middlebury.toml --workers 0

	# predict
	mkdir probs
	./rs predict --batch_size 1 --checkpoint vt/middlebury/tmp/checkpoint-00001-of-00001.pth --tile_size 256 --model config/model-unet-middlebury.toml --dataset config/dataset-building-middlebury.toml rutland/validation/images probs