Last active
January 21, 2022 04:22
-
-
Save sixy6e/bbb2746dff43cd1d72e9d54e18a38628 to your computer and use it in GitHub Desktop.
Toy script to scrape specfic collection of imagery
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This toy script was put together to assist in trawling some imagery and building a small catalog. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A basic script to trawl an imagery archie and build a custom catalog. | |
""" | |
from pathlib import Path | |
import click | |
import rasterio # type: ignore | |
import pandas # type: ignore | |
import geopandas # type: ignore | |
from shapely.geometry import Polygon # type: ignore | |
import structlog | |
_LOG = structlog.get_logger() | |
def image_info(pathname) -> geopandas.GeoDataFrame: | |
""" | |
Open an image and retrieve the shape extents. | |
Not getting the extents via the affine transformation as this | |
data is not northup (has x and y rotations). | |
""" | |
with rasterio.open(pathname) as src: | |
# forming a polygon using UL, UR, LR, LL coordinates | |
polygon = Polygon( | |
[ | |
src.xy(0, 0), | |
src.xy(0, src.shape[1]), | |
src.xy(*src.shape), | |
src.xy(src.shape[0], 0), | |
] | |
) | |
data = { | |
"pathname": [str(pathname)], | |
"vessel_side": [pathname.stem.strip("_")[-1]], # ends in P or S | |
"geometry": [polygon], | |
} | |
gdf = geopandas.GeoDataFrame(data, crs=src.crs) | |
return gdf | |
@click.command() | |
@click.option( | |
"--rootdir", | |
type=click.Path(exists=True, readable=True), | |
help="The root input directory", | |
) | |
@click.option( | |
"--outdir", | |
type=click.Path(exists=True, writable=True), | |
help="The base output directory to contain the output file", | |
) | |
@click.option("--pattern", default="*.tif", help="The filename pattern to search for.") | |
def main(rootdir: str, outdir: str, pattern: str = "*.tif") -> None: | |
"""Find the imagery, and build a mini catalog as a vector file.""" | |
catalog = [] | |
for file in Path(rootdir).rglob(pattern): | |
# we'll store the full pathname, not relative, nor basename | |
pname = file.absolute() | |
_LOG.info("processing datafile", pathname=str(pname)) | |
catalog.append(image_info(pname)) | |
_LOG.info("concatenating individual dataframes") | |
gdf = pandas.concat(catalog) | |
out_pathname = Path(outdir).joinpath("data-catalog.gpkg") | |
_LOG.info("writing catalog as a GeoPackage", out_pathname=out_pathname) | |
gdf.to_file(out_pathname, driver="GPKG") | |
_LOG.info("finished") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment