Source code for eolab.georastertools.tiling

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This module defines a rastertool named Tiling that generates tiles a raster images based on
a reference grid.
"""
import logging
from typing import List
from pathlib import Path

import rasterio
import rasterio.mask
import geopandas as gpd
import sys

from eolab.georastertools import utils
from eolab.georastertools import Rastertool, RastertoolConfigurationException
from eolab.georastertools.processing import vector
from eolab.georastertools.product import RasterProduct

_logger = logging.getLogger(__name__)


[docs]class Tiling(Rastertool): """Raster tool that tiles a raster following the geometries of a grid. The tool takes as input two files: a raster image and a grid file (shapefile, geojson ...). The grid file defines the geometries of the tiles. Different options can be configured: - output dir: select the dir where tiles will be stored to disk - output basename: naming rule of tiles. By default, the basename is the name of the original file followed by "_tile" followed by the tile index. In the formatted string, tile index is at position 1 and original filename is at position 0. For instance, a naming rule such as "tile{1}.tif" will generate the tile name "tile75.tif" for the tile id = 75. - output subdir: naming rule for subdirs when every tile must be generated in a different subdir. It is also possible to select the tiles to generate. The seleted tiles are the ones which match the expected values of a given grid's property. """
[docs] def __init__(self, geometry_file: str): """ Constructor Args: geometry_file (str): File name of the geometry file used to tile raster images """ super().__init__() self._grid = gpd.read_file(geometry_file) self._output_basename = None self._output_subdir = False
@property def grid(self): """Get the grid that defines the geometries for tiling raster images""" return self._grid @property def output_basename(self) -> str: """Base name for naming output""" return self._output_basename @property def output_subdir(self): """Naming convention of the subdir where to store outputs. None if no tiles shall not be generated in subdirs (but directly in the output dir)""" return self._output_subdir
[docs] def with_output(self, outputdir: str = ".", output_basename: str = "{}_tile{}", output_subdir: str = None): """Set up the output. Args: outputdir (str, optional, default="."): Output dir where to store results. output_basename (str, optional, default="{}_tile{}"): Basename for the output file as a formatted string. By default, the basename is the name of the original file followed by "_tile" followed by the tile index. In the formatted string, tile index is at position 1 and original filename is at position 0. For instance, tile{1}.tif will generate the following name tile75.tif for the tile id = 75 output_subdir (str, optional, default=None): When each tile must be generated in a different subdir, output_subdir defines the naming convention for the subdir. It is a formatted string with one positional parameter corresponding to the tile index. For instance, tile{} will generate the following subdir name: tile75/ for the tile id = 75. If None, output files will be generated directly in the outputdir. Returns: :obj:`eolab.georastertools.Tiling`: The current instance so that it is possible to chain the with... calls (fluent API) """ # Test if output repository is valid super().with_output(outputdir) self._output_basename = output_basename self._output_subdir = output_subdir return self
[docs] def with_id_column(self, id_column: str, ids: List[int]): """Set up a filter on the geometries for which tiles shall be generated. Args: id_column (str): Name of the column in the vector file that contains the ids to filter. ids: List of ids values for which tiles shall be generated. Returns: :obj:`eolab.georastertools.Tiling`: The current instance so that it is possible to chain the with... calls (fluent API) """ # Test if id_column is defined when ids are set if id_column is not None: if id_column not in self._grid.columns: raise RastertoolConfigurationException( f"Invalid id column named \"{id_column}\": it does not exist in the grid") self._grid = self._grid.set_index(id_column) if ids is not None: if id_column is None: raise RastertoolConfigurationException( "Ids cannot be specified when id_col is not defined") self._grid = self._grid[self._grid.index.isin(ids)] if self._grid.empty: # if no id common between grid and given ids raise RastertoolConfigurationException( f"No value in the grid column \"{id_column}\" are matching " f"the given list of ids {str(ids)}") else: invalid_ids = [i for i in ids if i not in self._grid.index] if len(invalid_ids) > 0: # log given ids which are not in the grid _logger.error(f"The grid column \"{id_column}\" does not contain " f"the following values: {str(invalid_ids)}") return self
[docs] def process_file(self, inputfile: str): """Tile the input file following the geometry file Args: inputfile (str): Input image to process Returns: [str]: The list of generates tiles. """ _logger.info(f"Processing file {inputfile}") # STEP 1: Prepare the input image so that it can be processed with RasterProduct(inputfile, vrt_outputdir=self.vrt_dir) as product: # STEP 2: Prepare grid (reproject it to raster's CRS) grid = vector.reproject(self.grid, inputfile) # STEP 3: apply tiling outputs = [] with product.open() as dataset: out_meta = dataset.meta # Crop and export every tiles for shape, i in zip(grid.geometry, grid.index): _logger.info("Crop and export tile " + str(i) + "...") try: # generate crop image image, transform = rasterio.mask.mask(dataset, [shape], crop=True, all_touched=True) # output location output = Path(self.outputdir) if self.output_subdir is not None: # if we need to export in a subdirectory output = output.joinpath(self.output_subdir.format(i)) if not output.is_dir(): output.mkdir() basename = utils.get_basename(inputfile) output = output.joinpath(self.output_basename.format(basename, i) + ".tif") # export out_meta.update({"height": image.shape[1], "width": image.shape[2], "transform": transform}) with rasterio.open(output, 'w', **out_meta) as dst: dst.write(image) outputs.append(output.as_posix()) _logger.info("Tile " + str(i) + " exported to " + str(output)) except ValueError: # if no overlap _logger.error("Input shape " + str(i) + " does not overlap raster") return outputs