Source code for rios.inputcollection

This module contains the InputCollection and InputIterator
classes. These classes are for ImageReader to keep track
of the inputs it has and deal with resampling.

# This file is part of RIOS - Raster I/O Simplification
# Copyright (C) 2012  Sam Gillingham, Neil Flood
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <>.

import os
import sys
import tempfile

from . import imageio
from . import rioserrors
from . import pixelgrid
from osgeo import gdal

[docs]class InputIterator(object): """ Class to allow iteration across an InputCollection instance. Do not instantiate this class directly - it is created by InputCollection.__iter__(). See for a description of how this works. There is another way, see: but it seemed too much like Windows 3.1 programming which scared me! Returns a image name, GDAL Dataset, PixelGridDefn, nullvallist and datatype for each iteration """ def __init__(self, collection): # collection = an InputCollection instance self.collection = collection self.index = 0 # start at first one def __iter__(self): # For iteration support - just return self. return self
[docs] def next(self): # For Python 2.x return self.__next__()
def __next__(self): # for iteration support. Raises a StopIteration # if we have read beyond the end of the collection if self.index >= len(self.collection.datasetList): raise StopIteration() # pull out the values we want to return image = self.collection.imageList[self.index] ds = self.collection.datasetList[self.index] pixgrid = self.collection.pixgridList[self.index] nullvals = self.collection.nullValList[self.index] datatype = self.collection.dataTypeList[self.index] # so next time we are looking at the next one self.index += 1 return (image, ds, pixgrid, nullvals, datatype)
[docs]class InputCollection(object): """ InputCollection class. Keeps all of the inputs and the information we need about them in one place. Gets passed a list of filenames and opens them, creates a PixelGridDefn and pulls out their null values. Setting the reference dataset is done via setReference() (is first dataset by default). Resampling can be performed by resampleToReference(). Use checkAllMatch() to see if resampling is necessary. """ def __init__(self, imageList, loggingstream=sys.stdout): """ Constructor. imageList is a list of file names that need to be opened. An ImageOpenException() is raised should opening fail. Set loggingstream to a file like object if you wish logging about resampling to be sent somewhere else rather than stdout. """ self.loggingstream = loggingstream # initialise our lists self.imageList = [] self.datasetList = [] self.pixgridList = [] self.nullValList = [] # NB: a list of lists self.dataTypeList = [] # numpy types self.filestoremove = [] # any temp resampled files # go thru each image for image in imageList: # try and open it ds = gdal.Open(str(image)) if ds is None: msg = 'Unable to Open %s' % image raise rioserrors.ImageOpenError(msg) # create a PixelGridDefn for the dataset pixGrid = self.makePixGridFromDataset(ds) # Stash the null values for each band dsNullValList = [] for i in range(ds.RasterCount): nullVal = ds.GetRasterBand(i + 1).GetNoDataValue() dsNullValList.append(nullVal) # get the datatype of band 1 gdaldatatype = ds.GetRasterBand(1).DataType numpytype = imageio.GDALTypeToNumpyType(gdaldatatype) # store the values in our lists self.imageList.append(image) self.datasetList.append(ds) self.pixgridList.append(pixGrid) self.nullValList.append(dsNullValList) self.dataTypeList.append(numpytype) # by default the reference dataset is the first file self.referencePixGrid = self.pixgridList[0] def __del__(self): """ Remove all resampled files. This does not appear to get called if there is an exception in the reader, so there is an exception handler and a finally in ImageReader.readBlock which calls cleanup directly. """ self.cleanup()
[docs] def close(self): """ Closes all open datasets """ for ds in self.datasetList: del ds self.datasetList = [] self.cleanup()
[docs] def cleanup(self): """ Removes any temp files. To be called from destructor? Seems not given Neil's experience - needs to be called from finally clause - not sure how to enforce this in user's script.... """ for f in self.filestoremove: if os.path.exists(f): try: os.remove(f) except rioserrors.PermissionError: # ignore any 'file in use' errors on Windows # This is only a problem when useVRT=False as we are dealing # with an actual dataset that cannot be deleted (rather than a VRT # which can for some reason). Fixing this properly is going to be # quite hard (the info object and maybe others still hold the # dataset open) so let's ignore for now. pass self.filestoremove = []
def __len__(self): # see return len(self.datasetList) def __getitem__(self, key): # see # for indexing, returns: # imagename, dataset, pixgrid,nullValList, datatype # if key outside range, then the individual lists # should raise KeyError image = self.imageList[key] ds = self.datasetList[key] pixgrid = self.pixgridList[key] nullValList = self.nullValList[key] datatype = self.dataTypeList[key] return (image, ds, pixgrid, nullValList, datatype) def __iter__(self): # see return InputIterator(self)
[docs] def setReference(self, refpath=None, refgeotrans=None, refproj=None, refNCols=None, refNRows=None, refPixgrid=None): """ Sets the reference dataset for resampling purposes. Either set refpath to a path to a GDAL file and all necessary values will be extracted. Or pass refgeotrans, refproj, refNCols and refNRows. """ # if we have a path, open it with GDAL if refpath is not None: ds = gdal.Open(refpath) if ds is None: msg = 'Unable to Open %s' % refpath raise rioserrors.ImageOpenError(msg) # make a pixgrid from the dataset self.referencePixGrid = self.makePixGridFromDataset(ds) # close the dataset del ds # else, check we have all the aother params elif refgeotrans is not None and refproj is not None and refNCols is not None and refNRows is not None: # create a pixgrid from the info we have proj = self.specialProjFixes(refproj) pixGrid = pixelgrid.PixelGridDefn(geotransform=refgeotrans, nrows=refNRows, ncols=refNCols, projection=proj) self.referencePixGrid = pixGrid elif refPixgrid is not None: # Or maybe we have been given a complete pixel grid already refPixgrid.projection = self.specialProjFixes(refPixgrid.projection) self.referencePixGrid = refPixgrid else: msg = 'Must pass either refpath or refPixgrid or all the other params' raise rioserrors.ParameterError(msg)
[docs] def resampleToReference(self, ds, nullValList, workingRegion, resamplemethod, tempdir='.', useVRT=False, allowOverviewsGdalwarp=False): """ Resamples any inputs that do not match the reference, to the reference image. ds is the GDAL dataset that needs to be resampled. nullValList is the list of null values for that dataset. workingRegion is a PixelGridDefn resamplemethod is a string containing a method supported by gdalwarp. Returns the new (temporary) resampled dataset instance. Do not call directly, use resampleAllToReference() """ if useVRT: ext = '.vrt' else: # get the driver from the input dataset # so we know the default extension, and type # for the temporary file. driver = ds.GetDriver() drivermeta = driver.GetMetadata() # temp image file name - based on driver extension ext = '' if gdal.DMD_EXTENSION in drivermeta: ext = '.' + drivermeta[gdal.DMD_EXTENSION] (fileh, temp_image) = tempfile.mkstemp(ext, dir=tempdir) os.close(fileh) overviewLevel = 'NONE' if allowOverviewsGdalwarp: overviewLevel = 'AUTO' if useVRT: driverName = 'VRT' else: driverName = driver.ShortName nullValues = self.makeWarpNullOptions(nullValList) warpOptions = gdal.WarpOptions(overviewLevel=overviewLevel, srcSRS=self.specialProjFixes(ds.GetProjection()), dstSRS=self.referencePixGrid.projection, outputBounds=[workingRegion.xMin, workingRegion.yMin, workingRegion.xMax, workingRegion.yMax], xRes=workingRegion.xRes, yRes=workingRegion.yRes, format=driverName, resampleAlg=resamplemethod, srcNodata=nullValues, dstNodata=nullValues) # delete later - add before the system call as if Ctrl-C is hit # control does not always return self.filestoremove.append(temp_image) usingExceptions = gdal.GetUseExceptions() gdal.UseExceptions() try: newds = gdal.Warp(temp_image, ds, options=warpOptions) except Exception as e: msg = ("Error while running gdal.Warp(): {}. Try setting the " + "RIOS_NO_VRT_FOR_RESAMPLING environment variable " + "to '1'").format(str(e)) raise rioserrors.GdalWarpError(msg) finally: if not usingExceptions: gdal.DontUseExceptions() # return the new dataset return newds
[docs] def resampleAllToReference(self, footprint, resamplemethodlist, tempdir='.', useVRT=False, allowOverviewsGdalwarp=False): """ Reamples all datasets that don't match the reference to the same as the reference. footprint is imageio.INTERSECTION, imageio.UNION or imageio.BOUNDS_FROM_REFERENCE resamplemethod is a string containing a method supported by gdalwarp. """ # find the working region based on the footprint workingRegion = self.findWorkingRegion(footprint) # go thru each input. for count in range(len(self.datasetList)): pixGrid = self.pixgridList[count] # check the pixgrid of the dataset matches the # reference allEqual = True if not self.referencePixGrid.equalPixSize(pixGrid): self.loggingstream.write("Pixel sizes don't match %.20f %.20f %.20f %.20f\n" % (self.referencePixGrid.xRes, pixGrid.xRes, self.referencePixGrid.yRes, pixGrid.yRes)) allEqual = False elif not self.referencePixGrid.equalProjection(pixGrid): self.loggingstream.write("Coordinate systems don't match %s %s\n" % (self.referencePixGrid.projection, pixGrid.projection)) allEqual = False elif not self.referencePixGrid.alignedWith(pixGrid): self.loggingstream.write("Images aren't on the same grid\n") allEqual = False # did it fail to match completely? if not allEqual: # resample the dataset ds = self.datasetList[count] nullVals = self.nullValList[count] resamplemethod = resamplemethodlist[count] newds = self.resampleToReference(ds, nullVals, workingRegion, resamplemethod, tempdir, useVRT, allowOverviewsGdalwarp) # stash the new temp dataset as our input item self.datasetList[count] = newds # just assume it has been successfully # resampled to exactly the workingRegion self.pixgridList[count] = workingRegion # close the original dataset - just using temp # resampled one from now. del ds
[docs] def checkAllMatch(self): """ Returns whether any resampling necessary to match reference dataset. Use as a check if no resampling is done that we can proceed ok. """ match = True for pixGrid in self.pixgridList: if not self.referencePixGrid.equalPixSize(pixGrid): match = False break elif not self.referencePixGrid.equalProjection(pixGrid): match = False break elif not self.referencePixGrid.alignedWith(pixGrid): match = False break return match
[docs] @staticmethod def makeWarpNullOptions(nullValList): """ Make appropriate options for gdalwarp, to handle null value properly. If any of the null values in the list is None, then we can't do it because the null value is not set on the file, so the return value is None. Returns a list that can be passed to as srcNodata and dstNodata params to gdal.Warp(). """ haveNone = False for nullVal in nullValList: if nullVal is None: haveNone = True if haveNone: allNulls = None else: nullValStrList = [str(n) for n in nullValList] allNulls = ' '.join(nullValStrList) return allNulls
[docs] @staticmethod def specialProjFixes(projwkt): """ Does any special fixes required for the projection. Returns the fixed projection WKT string. Specifically this does two things, both of which are to cope with rubbish that Imagine has put into the projection. Firstly, it removes the crappy TOWGS84 parameters which Imagine uses for GDA94, and secondly removes the crappy name which Imagine gives to the correct GDA94. If neither of these things is found, returns the string unchanged. """ dodgyTOWGSstring = "TOWGS84[-16.237,3.51,9.939,1.4157e-06,2.1477e-06,1.3429e-06,1.91e-07]" properTOWGSstring = "TOWGS84[0,0,0,0,0,0,0]" if projwkt.find('"GDA94"') > 0 or projwkt.find('"Geocentric_Datum_of_Australia_1994"') > 0: newWkt = projwkt.replace(dodgyTOWGSstring, properTOWGSstring) else: newWkt = projwkt # Imagine's name for the correct GDA94 also causes problems, so # replace it with something more standard. newWkt = newWkt.replace('GDA94-ICSM', 'GDA94') return newWkt
[docs] @staticmethod def makePixGridFromDataset(ds): """ Make a pixelgrid object from the given dataset. """ proj = InputCollection.specialProjFixes(ds.GetProjection()) geotrans = ds.GetGeoTransform() (nrows, ncols) = (ds.RasterYSize, ds.RasterXSize) pixGrid = pixelgrid.PixelGridDefn(geotransform=geotrans, nrows=nrows, ncols=ncols, projection=proj) return pixGrid
[docs] def findWorkingRegion(self, footprint): """ Work out what the combined pixel grid should be, in terms of the given reference input raster. Returns a PixelGridDefn object. """ combinedGrid = pixelgrid.findCommonRegion(self.pixgridList, self.referencePixGrid, combine=footprint) return combinedGrid