Source code for rios.calcstats

"""
This module creates pyramid layers and calculates statistics for image
files. Much of it was originally for ERDAS Imagine files but should work
with any other format that supports pyramid layers and statistics

"""
# This file is part of RIOS - Raster I/O Simplification
# Copyright (C) 2012  Sam Gillingham, Neil Flood
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import os

import numpy
from osgeo import gdal

from . import cuiprogress
from .rioserrors import ProcessCancelledError, SinglePassActionsError


# When calculating overviews (i.e. pyramid layers), default behaviour
# is controlled by these
dfltOverviewLvls = os.getenv('RIOS_DFLT_OVERVIEWLEVELS')
if dfltOverviewLvls is None:
    DEFAULT_OVERVIEWLEVELS = [4, 8, 16, 32, 64, 128, 256, 512]
else:
    DEFAULT_OVERVIEWLEVELS = [int(i) for i in dfltOverviewLvls.split(',')]
DEFAULT_MINOVERVIEWDIM = int(os.getenv('RIOS_DFLT_MINOVERLEVELDIM', default=128))
DEFAULT_OVERVIEWAGGREGRATIONTYPE = os.getenv('RIOS_DFLT_OVERVIEWAGGTYPE', 
    default="NEAREST")


[docs]def progressFunc(value, string, userdata): """ Progress callback for BuildOverviews """ percent = (userdata.curroffset + (value / userdata.nbands) * 100) userdata.progress.setProgress(percent) if value == 1.0: userdata.curroffset = userdata.curroffset + 100.0 / userdata.nbands return not userdata.progress.wasCancelled()
# make userdata object with progress and num bands
[docs]class ProgressUserData(object): pass
[docs]def addPyramid(ds, progress, minoverviewdim=DEFAULT_MINOVERVIEWDIM, levels=DEFAULT_OVERVIEWLEVELS, aggregationType=None): """ Adds Pyramid layers to the dataset. Adds levels until the raster dimension of the overview layer is < minoverviewdim, up to a maximum level controlled by the levels parameter. Assumes that any desired null value has already been set on each band of the Dataset. Uses gdal.Dataset.BuildOverviews() to do the work. """ progress.setLabelText("Computing Pyramid Layers...") progress.setProgress(0) # ensure everything is written to disc first ds.FlushCache() # first we work out how many overviews to build based on the size mindim = min(ds.RasterXSize, ds.RasterYSize) nOverviews = 0 for i in levels: if (mindim // i) > minoverviewdim: nOverviews = nOverviews + 1 # Need to find out if we are thematic or continuous. if aggregationType is None: aggregationType = DEFAULT_OVERVIEWAGGREGRATIONTYPE userdata = ProgressUserData() userdata.progress = progress userdata.nbands = ds.RasterCount userdata.curroffset = 0 ds.BuildOverviews(aggregationType, levels[:nOverviews], progressFunc, userdata) if progress.wasCancelled(): raise ProcessCancelledError() # make sure it goes to 100% progress.setProgress(100)
[docs]def findOrCreateColumn(ratObj, usage, name, dtype): """ Returns the index of an existing column matched on usage. Creates it if not already existing using the supplied name and dtype Returns a tuple with index and a boolean specifying if it is a new column or not """ ncols = ratObj.GetColumnCount() for col in range(ncols): if ratObj.GetUsageOfCol(col) == usage: return col, False # got here so can't exist ratObj.CreateColumn(name, dtype, usage) # new one will be last col return ncols, True
gdalLargeIntTypes = set([gdal.GDT_Int16, gdal.GDT_UInt16, gdal.GDT_Int32, gdal.GDT_UInt32]) # hack for GDAL 3.5 and later which suppport 64 bit ints if hasattr(gdal, 'GDT_Int64'): gdalLargeIntTypes.add(gdal.GDT_Int64) gdalLargeIntTypes.add(gdal.GDT_UInt64) gdalFloatTypes = set([gdal.GDT_Float32, gdal.GDT_Float64]) numpyUnsignedIntTypes = (numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64) numpySignedIntTypes = (numpy.int8, numpy.int16, numpy.int32, numpy.int64)
[docs]def addStatistics(ds, progress, ignore=None, approx_ok=False): """ Calculates statistics and adds them to the image. As of version 2.0.5, this function is no longer used directly with RIOS, and is maintained purely for backward compatibility with programs which call it directly. Uses gdal.Band.ComputeStatistics() for mean, stddev, min and max, and gdal.Band.GetHistogram() to do histogram calculation. The median and mode are estimated using the histogram, and so for larger datatypes, they will be approximate only. For thematic layers, the histogram is calculated with as many bins as required, for athematic integer and float types, a maximum of 256 bins is used. Note that this routine will use the given ignore value to set the no-data value (i.e. null value) on the dataset, using the same value for every band. Obsolete from version 2.0.5. See addBasicStatsGDAL() and addHistogramsGDAL() for replacements. """ # Set the null value, as that is what this routine used to do, historically. for i in range(ds.RasterCount): band = ds.GetRasterBand(i + 1) band.SetNoDataValue(ignore) # Add basic statistics and histogram, using GDAL. minMaxList = addBasicStatsGDAL(ds, approx_ok) addHistogramsGDAL(ds, minMaxList, approx_ok)
[docs]def addBasicStatsGDAL(ds, approx_ok): """ Add basic statistics (min, max, mean, stddev) to all bands of the given Dataset, using GDAL's function. If approx_ok is True, then much faster approximate statistics will be calculated (in particular, the min and max will only be approximate). Assumes that any desired null value has already been set on each band of the Dataset. Return a list of the minimum and maximum values for each band, in case this is required later for the histogram. """ minMaxList = [] for bandndx in range(ds.RasterCount): band = ds.GetRasterBand(bandndx + 1) (minval, maxval, meanval, stddev) = computeStatsGDAL(band, approx_ok) if None not in (minval, maxval, meanval, stddev): writeBasicStats(band, minval, maxval, meanval, stddev, approx_ok) minMaxList.append((minval, maxval)) return minMaxList
[docs]def addHistogramsGDAL(ds, minMaxList, approx_ok): """ Add histograms to all bands of the given Dataset, using GDAL's own function. If approx_ok is True, then much faster approximate histograms will be calculated (i.e. the pixel counts will be in proportion, but not exactly accurate). Assumes that any desired null value has already been set on each band of the Dataset. The minMaxList is as returned by addBasicStatsGDAL. """ for bandndx in range(ds.RasterCount): band = ds.GetRasterBand(bandndx + 1) (minval, maxval) = minMaxList[bandndx] if minval is not None: histParams = HistogramParams(band, minval, maxval) # Get histogram and force GDAL to recalculate it. Note that we # use include_out_of_range=True, which is safe because we have # calculated the histParams.calcMin/calcMax from the data. includeOutOfRange = True hist = band.GetHistogram(histParams.calcMin, histParams.calcMax, histParams.nbins, includeOutOfRange, approx_ok) # comes back as a list for some reason hist = numpy.array(hist) # Check if GDAL's histogram code overflowed. This is not a # fool-proof test, as some overflows will not result in negative # counts. Since GDAL 3.x, it is no longer required, as counts # are int64. histogramOverflow = (hist.min() < 0) if not histogramOverflow: writeHistogram(ds, band, hist, histParams)
[docs]def computeStatsGDAL(band, approx_ok): """ Compute basic statistics of a single band, using GDAL's function. Returns the values as a tuple (does NOT write anything into the file). If there are no non-null pixels, then all stats are returned as None. Returns (minval, maxval, mean, stddev) """ # get GDAL to calculate statistics - force recalculation. Trap errors usingExceptions = gdal.GetUseExceptions() gdal.UseExceptions() try: (minval, maxval, meanval, stddev) = band.ComputeStatistics(approx_ok) except RuntimeError as e: if str(e).endswith('Failed to compute statistics, no valid pixels found in sampling.'): minval = maxval = meanval = stddev = None else: raise e finally: if not usingExceptions: gdal.DontUseExceptions() return (minval, maxval, meanval, stddev)
[docs]def writeBasicStats(band, minval, maxval, meanval, stddev, approx_ok): """ Write the given basic statistics into the given band. It is assumed that by this point, we have set the null value on the band (this is normally done when the file is opened). """ band.SetStatistics(float(minval), float(maxval), float(meanval), float(stddev)) nullval = band.GetNoDataValue() if nullval is not None: # Not sure, but I think this is only used by QGIS band.SetMetadataItem("STATISTICS_EXCLUDEDVALUES", repr(nullval)) # I think that mainly the HFA format makes use of these (not sure). if approx_ok: band.SetMetadataItem("STATISTICS_APPROXIMATE", "YES") else: band.SetMetadataItem("STATISTICS_SKIPFACTORX", "1") band.SetMetadataItem("STATISTICS_SKIPFACTORY", "1")
[docs]class HistogramParams: """ Work out the various parameters needed by GDAL to compute a histogram. The inferences are based on the pixel datatype. Some (but not all) of the parameters are also used when doing a single-pass histogram. """ def __init__(self, band, minval, maxval): self.min = None self.max = None # Step is used when calculating median & mode, and for reducing # a 'direct' histogram to a 'linear' one (single-pass case). self.step = None # calcMin/calcMax are used when GetHistogram calculates the bin edges self.calcMin = None self.calcMax = None self.nbins = None self.binFunction = None # The maximum number of bins for 'linear' binFunction cases self.maxLinearBins = 256 layerType = band.GetMetadataItem('LAYER_TYPE') self.thematic = (layerType == "thematic") # Note that we explicitly set step in each datatype case. # In principle, this can be calculated, as it is done in the # float and large-int cases, but for some of the others we need # it to be exactly equal to 1, so we set it explicitly here, to # avoid rounding error problems. if self.thematic or (band.DataType == gdal.GDT_Byte): # We want a bin for every individual value, and the bin width is 1. # We also fix the min value to zero, instead of minval. This is # not ideal, but is because of interactions with the use of the # RAT WriteArray and ReadAsArray functions to handle the histogram # as a RAT column. self.min = 0 self.max = int(numpy.ceil(maxval)) self.step = 1.0 self.calcMin = self.min - 0.5 self.calcMax = self.max + 0.5 self.nbins = (self.max - self.min + 1) self.binFunction = 'direct' # If we have a negative minval, then we are screwed. if minval < 0: msg = ("Histogram code does not cope with negative values " + "in thematic raster. Please complain to the authors.") raise ValueError(msg) elif band.DataType in gdalLargeIntTypes: histrange = int(numpy.ceil(maxval) - numpy.floor(minval)) + 1 (self.min, self.max) = (int(minval), int(maxval)) if histrange <= self.maxLinearBins: self.nbins = histrange self.step = 1.0 self.binFunction = 'direct' self.calcMin = self.min - 0.5 self.calcMax = self.max + 0.5 else: self.nbins = self.maxLinearBins self.binFunction = 'linear' self.calcMin = int(self.min) self.calcMax = int(self.max) self.step = float(self.calcMax - self.calcMin) / self.nbins elif band.DataType in gdalFloatTypes: self.nbins = self.maxLinearBins (self.min, self.max) = (float(minval), float(maxval)) self.binFunction = 'linear' self.calcMin = self.min self.calcMax = self.max if self.calcMin == self.calcMax: self.calcMax = self.calcMax + 0.5 self.nbins = 1 self.step = float(self.calcMax - self.calcMin) / self.nbins
[docs]def calcStats(ds, progress=None, ignore=None, minoverviewdim=DEFAULT_MINOVERVIEWDIM, levels=DEFAULT_OVERVIEWLEVELS, aggregationType=None, approx_ok=False): """ This function is no longer used internally, and is maintained purely for backward compatibility for programs which called it directly. Calls the addPyramid and addStatistics functions, to add pyramid layers (i.e. overviews), and basic statistics and histogram, to the given open Dataset ``ds``. See the docstrings for those functions for details. """ if progress is None: progress = cuiprogress.SilentProgress() if ignore is not None: setNullValue(ds, ignore) addPyramid(ds, progress, minoverviewdim=minoverviewdim, levels=levels, aggregationType=aggregationType) addStatistics(ds, progress, ignore, approx_ok=approx_ok)
[docs]def setNullValue(ds, nullValue): """ Set the given null value on all bands of the given Dataset """ for i in range(ds.RasterCount): band = ds.GetRasterBand(i + 1) band.SetNoDataValue(nullValue)
[docs]class SinglePassManager: """ The required info for dealing with single-pass pyramids/statistics/histogram. There is some complexity here, because the decisions about what to do are a result of a number of different factors. We attempt to make these decisions as early as possible, and store the decisions on this object, so they can just be checked later. The general intention is that wherever possible, the pyramids, basic statistics, and histogram, will all be done with the single-pass methods. When this is not possible, or has been explicitly disabled, then it will fall back to using GDAL's methods, after the whole raster has been written. """ def __init__(self, outfiles, controls, workinggrid, tmpfileMgr): """ Check whether single-pass is appropriate and/or supported for all output files. """ self.PYRAMIDS = 0 self.STATISTICS = 1 self.HISTOGRAM = 2 self.histSupportedDtypes = numpySignedIntTypes + numpyUnsignedIntTypes self.supportedAggtypes = ("NEAREST", ) self.omit = {} self.singlePassRequested = {} self.approxOK = {} self.thematic = {} self.overviewLevels = {} self.oviewAggtype = {} self.arrDtype = {} self.accumulators = {} self.directPyramidsSupported = {} (nrows, ncols) = workinggrid.getDimensions() mindim = min(nrows, ncols) driverSupportsPyramids = self.checkDriverPyramidSupport(outfiles, controls, tmpfileMgr) for (symbolicName, seqNum, filename) in outfiles: # Store all the relevant settings from the controls object, # in a form which is a bit easier to query. # (These are all the same for all seqNum values, and unnecessarily # reset each time for the same symbolicName. Sorry.) self.omit[symbolicName, self.PYRAMIDS] = ( controls.getOptionForImagename('omitPyramids', symbolicName)) self.singlePassRequested[symbolicName, self.PYRAMIDS] = ( controls.getOptionForImagename('singlePassPyramids', symbolicName)) self.omit[symbolicName, self.STATISTICS] = ( controls.getOptionForImagename('omitBasicStats', symbolicName)) self.singlePassRequested[symbolicName, self.STATISTICS] = ( controls.getOptionForImagename('singlePassBasicStats', symbolicName)) self.omit[symbolicName, self.HISTOGRAM] = ( controls.getOptionForImagename('omitHistogram', symbolicName)) self.singlePassRequested[symbolicName, self.HISTOGRAM] = ( controls.getOptionForImagename('singlePassHistogram', symbolicName)) driverName = controls.getOptionForImagename('drivername', symbolicName) self.directPyramidsSupported[symbolicName] = ( driverSupportsPyramids[driverName]) self.approxOK[symbolicName] = controls.getOptionForImagename( 'approxStats', symbolicName) self.thematic[symbolicName] = controls.getOptionForImagename( 'thematic', symbolicName) oviewLvls = controls.getOptionForImagename('overviewLevels', symbolicName) aggType = controls.getOptionForImagename( 'overviewAggType', symbolicName) if aggType is None: aggType = "NEAREST" self.oviewAggtype[symbolicName] = aggType minOverviewDim = controls.getOptionForImagename( 'overviewMinDim', symbolicName) nOverviews = 0 for lvl in oviewLvls: if (mindim // lvl) > minOverviewDim: nOverviews += 1 self.overviewLevels[symbolicName] = oviewLvls[:nOverviews]
[docs] def checkDriverPyramidSupport(self, outfiles, controls, tmpfileMgr): """ For all the format drivers being used for output, check whether they support direct writing of pyramid layers. Return a dictionary keyed by driver name, with boolean values. """ driverSupportsPyramids = {} nrows = ncols = 64 fillVal = 20 for (symbolicName, seqNum, filename) in outfiles: driverName = controls.getOptionForImagename( 'drivername', symbolicName) if driverName not in driverSupportsPyramids: drvr = gdal.GetDriverByName(driverName) suffix = ".{}".format(drvr.GetMetadataItem('DMD_EXTENSION')) # Create a small test image with a single overview level, # written directly. imgfile = tmpfileMgr.mktempfile(prefix='pyrcheck_', suffix=suffix) arr = numpy.full((nrows, ncols), fillVal, dtype=numpy.uint8) ds = drvr.Create(imgfile, ncols, nrows, 1, gdal.GDT_Byte) band = ds.GetRasterBand(1) ds.BuildOverviews(overviewlist=[2]) band.WriteArray(arr) band_ov = band.GetOverview(0) arr_ov = arr[::2, ::2] band_ov.WriteArray(arr_ov) del band_ov, band, ds # Now read back the overview array ds = gdal.Open(imgfile) band = ds.GetRasterBand(1) band_ov = band.GetOverview(0) arr_sub2 = band_ov.ReadAsArray() del band_ov, band, ds drvr.Delete(imgfile) # If the overview array is full of the fill value, then it works supported = (arr_sub2 == fillVal).all() driverSupportsPyramids[driverName] = supported return driverSupportsPyramids
[docs] def initFor(self, ds, symbolicName, seqNum, arr): """ Initialise for the given output file """ includeStats = self.doSinglePassStatistics(symbolicName) self.arrDtype[symbolicName] = arr.dtype includeHist = self.doSinglePassHistogram(symbolicName) if includeStats or includeHist: nullval = ds.GetRasterBand(1).GetNoDataValue() thematic = self.thematic[symbolicName] key = (symbolicName, seqNum) numBands = arr.shape[0] self.accumulators[key] = [ SinglePassAccumulator(includeStats, includeHist, arr.dtype, nullval, thematic) for i in range(numBands) ] if self.doSinglePassPyramids(symbolicName): aggType = self.oviewAggtype[symbolicName] ds.BuildOverviews(aggType, self.overviewLevels[symbolicName])
[docs] def doSinglePassPyramids(self, symbolicName): """ Return True if we should do single-pass pyramids layers, False otherwise. Decision depends on choices for omitPyramids, singlePassPyramids, and overviewAggType. """ key = (symbolicName, self.PYRAMIDS) omit = self.omit[key] supported = self.directPyramidsSupported[symbolicName] spReq = self.singlePassRequested[key] aggType = self.oviewAggtype[symbolicName] if spReq is True and aggType not in self.supportedAggtypes: msg = ("Single-pass pyramids explicitly requested, but " + "not supported for aggregationType '{}'").format( aggType) raise SinglePassActionsError(msg) spPyr = ((spReq is True or spReq is None) and (not omit) and supported and (aggType in self.supportedAggtypes)) return spPyr
[docs] def doSinglePassStatistics(self, symbolicName): """ Return True if we should do single-pass basic statistics, False otherwise. """ key = (symbolicName, self.STATISTICS) omit = self.omit[key] spReq = self.singlePassRequested[key] approxOK = self.approxOK[symbolicName] spStats = ((spReq is True or spReq is None) and not (omit or approxOK)) return spStats
[docs] def doSinglePassHistogram(self, symbolicName): """ Return True if we should do single-pass histogram, False otherwise, based on what has been requested, the datatype of the raster. """ key = (symbolicName, self.HISTOGRAM) omit = self.omit[key] spReq = self.singlePassRequested[key] approxOK = self.approxOK[symbolicName] if symbolicName not in self.arrDtype: msg = ("doSinglePassHistogram({name}) has been called " + "before initFor({name}, ...)").format(name=symbolicName) raise SinglePassActionsError(msg) dtype = self.arrDtype[symbolicName] dtypeSupported = (dtype in self.histSupportedDtypes) # Here we distinguish between spReq being True or None. If it # is None, then we will settle on some suitable default behaviour, # depending on other conditions, but if it is explicitly True, # then we must have the required conditions, or raise an # exception to explain why it will not be done. if spReq is True and not dtypeSupported: msg = ("Explicitly requested single-pass histogram, but " + "this is not supported for datatype {}".format(dtype)) raise SinglePassActionsError(msg) spHist = ((spReq is True or spReq is None) and dtypeSupported and not (omit or approxOK)) return spHist
[docs]class SinglePassAccumulator: """ Accumulator for statistics and histogram for a single band. Used when doing single-pass stats and/or histogram. """ def __init__(self, includeStats, includeHist, dtype, nullval, thematic): self.nullval = nullval self.includeStats = includeStats self.includeHist = includeHist self.thematic = thematic if includeStats: self.minval = None self.maxval = None self.sum = 0 self.ssq = 0 self.count = 0 if includeHist: # Match the 'thematic' behaviour in HistogramParams if thematic or (dtype == numpy.uint8): self.binFunc = "direct" self.histMinZero = True else: self.binFunc = "linear" self.histMinZero = False # Separate count arrays for (values >= 0) and (values < 0) self.hist_pos = None self.hist_neg = None
[docs] def doStatsAccum(self, arr): """ Accumulate basic stats for the given array """ if self.nullval is None: values = arr.flatten() else: values = arr[arr != self.nullval] if len(values) > 0: self.sum += values.astype(numpy.float64).sum() self.ssq += (values.astype(numpy.float64)**2).sum() self.count += values.size minval = values.min() if self.minval is None or minval < self.minval: self.minval = minval maxval = values.max() if self.maxval is None or maxval > self.maxval: self.maxval = maxval
[docs] def finalStats(self): """ Return the final values of the four basic statistics (minval, maxval, mean, stddev) """ meanval = None stddev = None if self.count > 0: meanval = self.sum / self.count variance = self.ssq / self.count - meanval ** 2 stddev = 0.0 # In case some rounding error made variance negative if variance >= 0: stddev = numpy.sqrt(variance) return (self.minval, self.maxval, meanval, stddev)
[docs] def doHistAccum(self, arr): """ Accumulate the histogram with counts from the given arr. For signed int types, maintain two separate count arrays, one for positive values and one for negatives. This is due to using numpy.bincount() to do the counting. """ if (arr.dtype in numpyUnsignedIntTypes): if arr.dtype == numpy.uint64: # bincount() wants to do a 'safe' cast into int64 (don't know # why). This fails for uint64, so do an 'unsafe' cast first. arr = arr.astype(numpy.int64) counts = numpy.bincount(arr.flatten()) if self.nullval is not None: counts = self.removeNullFromCounts(counts, self.nullval) self.updateHist(counts, positive=True) else: # Counts for (arr >= 0) counts = numpy.bincount(arr[arr >= 0]) if self.nullval is not None and self.nullval >= 0: counts = self.removeNullFromCounts(counts, self.nullval) self.updateHist(counts, positive=True) # Counts for (arr < 0) counts = numpy.bincount(-arr[arr < 0]) # bincount() always includes zero, but we already count that in # the positives, so trim it off counts = counts[1:] if self.nullval is not None and self.nullval < 0: counts = self.removeNullFromCounts(counts, -self.nullval) self.updateHist(counts, positive=False)
[docs] @staticmethod def addTwoHistograms(hist1, hist2): """ Add the two given histograms together, and return the result. If one is longer than the other, the shorter one is added to it. """ if hist1 is None: result = hist2 else: l1 = len(hist1) l2 = len(hist2) if l1 > l2: hist1[:l2] += hist2 result = hist1 else: hist2[:l1] += hist1 result = hist2 return result
[docs] @staticmethod def removeNullFromCounts(counts, nullval): """ The counts will include a count for the null value. Set this to zero, and if it is at the end of the count array, truncate this back to the next biggest non-zero count. """ numCounts = len(counts) if nullval < (numCounts - 1): counts[int(nullval)] = 0 elif nullval == (numCounts - 1): # The null count is at the end, so find the next non-zero count, # and trim back to there. We don't need to trim from the start, # because of how numpy.bincount works. nonzeroNdx = numpy.where(counts[:-1] > 0)[0] if len(nonzeroNdx) > 0: last = nonzeroNdx[-1] counts = counts[:last + 1] else: counts = numpy.array([], dtype=counts.dtype) return counts
[docs] def updateHist(self, newCounts, positive): """ Update the current histogram counts. If positive is True, then the counts for positive values are updated, otherwise those for the negative values are updated. """ if len(newCounts) > 0: if positive: self.hist_pos = self.addTwoHistograms(self.hist_pos, newCounts) else: self.hist_neg = self.addTwoHistograms(self.hist_neg, newCounts)
[docs] def fullHist(self): """ Return the full histogram, as (minval, maxval, counts) """ minval = maxval = counts = None havePos = (self.hist_pos is not None) haveNeg = (self.hist_neg is not None) if ((havePos and not haveNeg) or (not havePos and haveNeg)): if havePos: counts = self.hist_pos else: counts = self.hist_neg nonzeroNdx = numpy.where(counts > 0)[0] if len(nonzeroNdx) > 0: minval = nonzeroNdx[0] maxval = nonzeroNdx[-1] counts = counts[minval:maxval + 1] if haveNeg: # Need to reverse saveMinval = minval minval = -maxval maxval = -saveMinval counts = counts[::-1] elif (havePos and haveNeg): nonzeroNdx = numpy.where(self.hist_neg > 0)[0] minval = -(nonzeroNdx[-1] + 1) nonzeroNdx = numpy.where(self.hist_pos > 0)[0] maxval = nonzeroNdx[-1] counts = numpy.concatenate([self.hist_neg[::-1], self.hist_pos]) if minval is not None and minval > 0 and self.histMinZero: newCounts = numpy.zeros(int(maxval) + 1, dtype=numpy.int64) newCounts[minval:] = counts counts = newCounts minval = 0 return (minval, maxval, counts)
[docs]def handleSinglePassActions(ds, arr, singlePassMgr, symbolicName, seqNum, xOff, yOff, timings): """ Called from writeBlock, to handle any single-pass actions which may be required. """ numBands = arr.shape[0] if singlePassMgr.doSinglePassPyramids(symbolicName): with timings.interval('pyramids'): writeBlockPyramids(ds, arr, singlePassMgr, symbolicName, xOff, yOff) if singlePassMgr.doSinglePassStatistics(symbolicName): with timings.interval('basicstats'): accumList = singlePassMgr.accumulators[symbolicName, seqNum] for i in range(numBands): accumList[i].doStatsAccum(arr[i]) if singlePassMgr.doSinglePassHistogram(symbolicName): with timings.interval('histogram'): accumList = singlePassMgr.accumulators[symbolicName, seqNum] for i in range(numBands): accum = accumList[i] accum.doHistAccum(arr[i])
[docs]def writeBlockPyramids(ds, arr, singlePassMgr, symbolicName, xOff, yOff): """ Calculate and write out the pyramid layers for all bands of the block given as arr. Called when doing single-pass pyramid layers. """ overviewLevels = singlePassMgr.overviewLevels[symbolicName] nOverviews = len(overviewLevels) numBands = arr.shape[0] for i in range(numBands): band = ds.GetRasterBand(i + 1) for j in range(nOverviews): band_ov = band.GetOverview(j) lvl = overviewLevels[j] # Offset from top-left edge o = lvl // 2 # Sub-sample by taking every lvl-th pixel in each direction arr_sub = arr[i, o::lvl, o::lvl] # The xOff/yOff of the block within the sub-sampled raster xOff_sub = xOff // lvl yOff_sub = yOff // lvl # The actual number of rows and cols to write, ensuring we # do not go off the edges nc = band_ov.XSize - xOff_sub nr = band_ov.YSize - yOff_sub arr_sub = arr_sub[:nr, :nc] band_ov.WriteArray(arr_sub, xOff_sub, yOff_sub)
[docs]def finishSinglePassStats(ds, singlePassMgr, symbolicName, seqNum): """ Finish the single-pass basic statistics for all bands of the given file, and write them into the file. """ accumList = singlePassMgr.accumulators[symbolicName, seqNum] numBands = len(accumList) for i in range(numBands): (minval, maxval, meanval, stddev) = accumList[i].finalStats() if None not in (minval, maxval, meanval, stddev): band = ds.GetRasterBand(i + 1) approx_ok = singlePassMgr.approxOK[symbolicName] writeBasicStats(band, minval, maxval, meanval, stddev, approx_ok)
[docs]def finishSinglePassHistogram(ds, singlePassMgr, symbolicName, seqNum): """ Finish the single-pass histogram, and write to file. Also writes the median and mode, which are estimated from the histogram. """ accumList = singlePassMgr.accumulators[symbolicName, seqNum] numBands = len(accumList) for i in range(numBands): accum = accumList[i] (minval, maxval, counts) = accum.fullHist() if minval is not None: band = ds.GetRasterBand(i + 1) histParams = HistogramParams(band, minval, maxval) if ((histParams.binFunction == 'linear') and (len(counts) > histParams.maxLinearBins)): # Our rules dictate that we want a linear bin-Function histogram # so convert the 'direct' one we have desiredNbins = histParams.maxLinearBins counts = linearHistFromDirect(desiredNbins, histParams.step, counts) writeHistogram(ds, band, counts, histParams)
[docs]def writeHistogram(ds, band, hist, histParams): """ Write the given histogram into the band object. Also use the histogram to estimate median and mode, and write them as well. """ ratObj = band.GetDefaultRAT() layerType = band.GetMetadataItem('LAYER_TYPE') thematic = (layerType == "thematic") # The GDAL HFA driver has a bug in its SetLinearBinning function, # which was introduced as part of the RFC40 changes. Until # this is fixed and widely distributed, we should disable the use # of RFC40-style techniques for HFA files. driverName = ds.GetDriver().ShortName disableRFC40 = (driverName == 'HFA') if thematic and ratObj is not None and not disableRFC40: histIndx, histNew = findOrCreateColumn(ratObj, gdal.GFU_PixelCount, "Histogram", gdal.GFT_Real) # Write the hist in a single go. Note that this only works because we # have forced histParams.min to be zero, which is why we only # do it this way for thematic cases. For other cases, the use of # the RAT Histogram column is questionable. ratObj.SetRowCount(histParams.nbins) ratObj.WriteArray(hist, histIndx) ratObj.SetLinearBinning(histParams.min, (histParams.calcMax - histParams.calcMin) / histParams.nbins) else: # Use GDAL's original metadata interface, for drivers which # don't support the more modern approach band.SetMetadataItem("STATISTICS_HISTOBINVALUES", '|'.join(map(str, hist)) + '|') band.SetMetadataItem("STATISTICS_HISTOMIN", repr(histParams.min)) band.SetMetadataItem("STATISTICS_HISTOMAX", repr(histParams.max)) band.SetMetadataItem("STATISTICS_HISTONUMBINS", repr(int(histParams.nbins))) band.SetMetadataItem("STATISTICS_HISTOBINFUNCTION", histParams.binFunction) # estimate the median - bin with the middle number middlenum = hist.astype(numpy.int64).sum() / 2 gtmiddle = hist.astype(numpy.int64).cumsum() >= middlenum medianbin = gtmiddle.nonzero()[0][0] medianval = medianbin * histParams.step + histParams.min if band.DataType in gdalFloatTypes: band.SetMetadataItem("STATISTICS_MEDIAN", repr(float(medianval))) else: band.SetMetadataItem("STATISTICS_MEDIAN", repr(int(round(medianval)))) # do the mode - bin with the highest count modebin = numpy.argmax(hist) modeval = modebin * histParams.step + histParams.min if band.DataType in gdalFloatTypes: band.SetMetadataItem("STATISTICS_MODE", repr(float(modeval))) else: band.SetMetadataItem("STATISTICS_MODE", repr(int(round(modeval)))) if ratObj is not None and not ratObj.ChangesAreWrittenToFile(): # For drivers that require the in memory thing band.SetDefaultRAT(ratObj)
[docs]def linearHistFromDirect(desiredNbins, step, counts): """ Take a direct-binFunction histogram and re-bin it to create a linear-binFunction equivalent. This is intended for use with counts created with the single-pass algorithm, for the cases when we would otherwise have chosen a linear-binFunction histogram. Generally this is to save writing a very large number of counts. The minval and maxval will be preserved. The given desiredNbins is the number of bins desired in the new histogram. The counts are the old counts, and will be re-calculated with the requested number of bins. We preserve the total count, so the new histogram refers to the same total number of pixels. """ if desiredNbins > len(counts): msg = "{} > {}. Cannot increase the number of bins".format(desiredNbins, len(counts)) raise SinglePassActionsError(msg) newCounts = numpy.zeros(desiredNbins, dtype=counts.dtype) upper = 0 for i in range(desiredNbins): lower = upper upper = (i + 1) * step # Accumulate the count for all bins intersecting this new bin j1 = int(lower) j2 = int(upper) if (i + 1) == desiredNbins: j2 += 1 newCounts[i] = counts[j1:j2].sum() return newCounts