From b6a4197d5c7aee99650be0d4a768e1914101fcae Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 14 Mar 2024 13:28:26 +0100 Subject: [PATCH] ZMap: support reading variant of format where there is no newline character at end of column --- autotest/gdrivers/zmap.py | 23 +++ frmts/zmap/zmapdataset.cpp | 357 ++++++++++++++++++++----------------- 2 files changed, 221 insertions(+), 159 deletions(-) diff --git a/autotest/gdrivers/zmap.py b/autotest/gdrivers/zmap.py index 9d39f1994acc..e7d62f876cf8 100755 --- a/autotest/gdrivers/zmap.py +++ b/autotest/gdrivers/zmap.py @@ -65,3 +65,26 @@ def test_zmap_nodata(): ) ds = None gdal.GetDriverByName("ZMap").Delete(filename) + + +############################################################################### +# Test variant of the format where there is no flush at end of column + + +def test_zmap_no_flush_end_of_column(tmp_path): + + src_ds = gdal.GetDriverByName("MEM").Create("", 2, 5) + src_ds.WriteRaster(0, 0, 2, 5, b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09") + filename = str(tmp_path / "out.zmap") + with gdaltest.config_option("ZMAP_EMIT_EOL_AT_END_OF_COLUMN", "NO"): + gdal.GetDriverByName("ZMap").CreateCopy(filename, src_ds) + f = gdal.VSIFOpenL(filename, "rb") + assert f + data = gdal.VSIFReadL(1, 1000, f) + gdal.VSIFCloseL(f) + assert ( + data + == b"!\n! Created by GDAL.\n!\n@GRID FILE, GRID, 4\n 20, 1E+30, , 7, 1\n 5, 2, 0.0000000, 2.0000000, -5.0000000, 0.0000000\n0.0, 0.0, 0.0\n@\n 0.0000000 2.0000000 4.0000000 6.0000000\n 8.0000000 1.0000000 3.0000000 5.0000000\n 7.0000000 9.0000000\n" + ) + ds = gdal.Open(filename) + assert ds.ReadRaster(buf_type=gdal.GDT_Byte) == src_ds.ReadRaster() diff --git a/frmts/zmap/zmapdataset.cpp b/frmts/zmap/zmapdataset.cpp index f5c6887c5fd3..377542a67255 100644 --- a/frmts/zmap/zmapdataset.cpp +++ b/frmts/zmap/zmapdataset.cpp @@ -31,7 +31,9 @@ #include "gdal_frmts.h" #include "gdal_pam.h" +#include #include +#include /************************************************************************/ /* ==================================================================== */ @@ -45,14 +47,17 @@ class ZMapDataset final : public GDALPamDataset { friend class ZMapRasterBand; - VSILFILE *fp; - int nValuesPerLine; - int nFieldSize; - int nDecimalCount; - int nColNum; - double dfNoDataValue; - vsi_l_offset nDataStartOff; - double adfGeoTransform[6]; + VSIVirtualHandleUniquePtr m_fp{}; + int m_nValuesPerLine = 0; + int m_nFieldSize = 0; + int m_nDecimalCount = 0; + int m_nColNum = -1; + double m_dfNoDataValue = 0; + vsi_l_offset m_nDataStartOff = 0; + std::array m_adfGeoTransform = {{0, 1, 0, 0, 0, 1}}; + int m_nFirstDataLine = 0; + int m_nCurLine = 0; + std::deque m_odfQueue{}; public: ZMapDataset(); @@ -98,6 +103,8 @@ ZMapRasterBand::ZMapRasterBand(ZMapDataset *poDSIn) eDataType = GDT_Float64; + // The format is column oriented! That is we have first the value of + // pixel (col=0, line=0), then the one of (col=0, line=1), etc. nBlockXSize = 1; nBlockYSize = poDSIn->GetRasterYSize(); } @@ -109,56 +116,97 @@ ZMapRasterBand::ZMapRasterBand(ZMapDataset *poDSIn) CPLErr ZMapRasterBand::IReadBlock(int nBlockXOff, CPL_UNUSED int nBlockYOff, void *pImage) { - ZMapDataset *poGDS = reinterpret_cast(poDS); - - if (poGDS->fp == nullptr) - return CE_Failure; + ZMapDataset *poGDS = cpl::down_cast(poDS); - if (nBlockXOff < poGDS->nColNum + 1) + // If seeking backwards in term of columns, reset reading to the first + // column + if (nBlockXOff < poGDS->m_nColNum + 1) { - VSIFSeekL(poGDS->fp, poGDS->nDataStartOff, SEEK_SET); - poGDS->nColNum = -1; + poGDS->m_fp->Seek(poGDS->m_nDataStartOff, SEEK_SET); + poGDS->m_nColNum = -1; + poGDS->m_nCurLine = poGDS->m_nFirstDataLine; + poGDS->m_odfQueue.clear(); } - if (nBlockXOff > poGDS->nColNum + 1) + if (nBlockXOff > poGDS->m_nColNum + 1) { - for (int i = poGDS->nColNum + 1; i < nBlockXOff; i++) + for (int i = poGDS->m_nColNum + 1; i < nBlockXOff; i++) { - if (IReadBlock(i, 0, pImage) != CE_None) + if (IReadBlock(i, 0, nullptr) != CE_None) return CE_Failure; } } - int i = 0; - const double dfExp = std::pow(10.0, poGDS->nDecimalCount); - while (i < nRasterYSize) + int iRow = 0; + const double dfExp = std::pow(10.0, poGDS->m_nDecimalCount); + double *padfImage = reinterpret_cast(pImage); + + // If we have previously read too many values, start by consuming the + // queue + while (iRow < nRasterYSize && !poGDS->m_odfQueue.empty()) + { + if (padfImage) + padfImage[iRow] = poGDS->m_odfQueue.front(); + ++iRow; + poGDS->m_odfQueue.pop_front(); + } + + // Now read as many lines as needed to finish filling the column buffer + while (iRow < nRasterYSize) { - char *pszLine = const_cast(CPLReadLineL(poGDS->fp)); + constexpr int MARGIN = 16; // Should be at least 2 for \r\n + char *pszLine = const_cast(CPLReadLine2L( + poGDS->m_fp.get(), + poGDS->m_nValuesPerLine * poGDS->m_nFieldSize + MARGIN, nullptr)); + ++poGDS->m_nCurLine; if (pszLine == nullptr) return CE_Failure; - int nExpected = nRasterYSize - i; - if (nExpected > poGDS->nValuesPerLine) - nExpected = poGDS->nValuesPerLine; - if (static_cast(strlen(pszLine)) != nExpected * poGDS->nFieldSize) + + // Each line should have at most m_nValuesPerLine values of size + // m_nFieldSize + const int nLineLen = static_cast(strlen(pszLine)); + if ((nLineLen % poGDS->m_nFieldSize) != 0) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Line %d has length %d, which is not a multiple of %d", + poGDS->m_nCurLine, nLineLen, poGDS->m_nFieldSize); return CE_Failure; + } - for (int j = 0; j < nExpected; j++) + const int nValuesThisLine = nLineLen / poGDS->m_nFieldSize; + if (nValuesThisLine > poGDS->m_nValuesPerLine) { - char *pszValue = pszLine + j * poGDS->nFieldSize; - const char chSaved = pszValue[poGDS->nFieldSize]; - pszValue[poGDS->nFieldSize] = 0; - if (strchr(pszValue, '.') != nullptr) - reinterpret_cast(pImage)[i + j] = CPLAtofM(pszValue); - else - reinterpret_cast(pImage)[i + j] = - atoi(pszValue) * dfExp; - pszValue[poGDS->nFieldSize] = chSaved; + CPLError( + CE_Failure, CPLE_AppDefined, + "Line %d has %d values, whereas the maximum expected is %d", + poGDS->m_nCurLine, nValuesThisLine, poGDS->m_nValuesPerLine); + return CE_Failure; } - i += nExpected; + for (int iValueThisLine = 0; iValueThisLine < nValuesThisLine; + iValueThisLine++) + { + char *pszValue = pszLine + iValueThisLine * poGDS->m_nFieldSize; + const char chSaved = pszValue[poGDS->m_nFieldSize]; + pszValue[poGDS->m_nFieldSize] = 0; + const double dfVal = strchr(pszValue, '.') != nullptr + ? CPLAtofM(pszValue) + : atoi(pszValue) * dfExp; + pszValue[poGDS->m_nFieldSize] = chSaved; + if (iRow < nRasterYSize) + { + if (padfImage) + padfImage[iRow] = dfVal; + ++iRow; + } + else + { + poGDS->m_odfQueue.push_back(dfVal); + } + } } - poGDS->nColNum++; + poGDS->m_nColNum++; return CE_None; } @@ -169,29 +217,19 @@ CPLErr ZMapRasterBand::IReadBlock(int nBlockXOff, CPL_UNUSED int nBlockYOff, double ZMapRasterBand::GetNoDataValue(int *pbSuccess) { - ZMapDataset *poGDS = reinterpret_cast(poDS); + ZMapDataset *poGDS = cpl::down_cast(poDS); if (pbSuccess) *pbSuccess = TRUE; - return poGDS->dfNoDataValue; + return poGDS->m_dfNoDataValue; } /************************************************************************/ /* ~ZMapDataset() */ /************************************************************************/ -ZMapDataset::ZMapDataset() - : fp(nullptr), nValuesPerLine(0), nFieldSize(0), nDecimalCount(0), - nColNum(-1), dfNoDataValue(0.0), nDataStartOff(0) -{ - adfGeoTransform[0] = 0; - adfGeoTransform[1] = 1; - adfGeoTransform[2] = 0; - adfGeoTransform[3] = 0; - adfGeoTransform[4] = 0; - adfGeoTransform[5] = 1; -} +ZMapDataset::ZMapDataset() = default; /************************************************************************/ /* ~ZMapDataset() */ @@ -201,8 +239,6 @@ ZMapDataset::~ZMapDataset() { FlushCache(true); - if (fp) - VSIFCloseL(fp); } /************************************************************************/ @@ -243,25 +279,17 @@ int ZMapDataset::Identify(GDALOpenInfo *poOpenInfo) return FALSE; i++; - char **papszTokens = CSLTokenizeString2(pszData + i, ",", 0); - if (CSLCount(papszTokens) < 3) + const CPLStringList aosTokens(CSLTokenizeString2(pszData + i, ",", 0)); + if (aosTokens.size() < 3) { - CSLDestroy(papszTokens); return FALSE; } - const char *pszToken = papszTokens[1]; + const char *pszToken = aosTokens[1]; while (*pszToken == ' ') pszToken++; - if (!STARTS_WITH(pszToken, "GRID")) - { - CSLDestroy(papszTokens); - return FALSE; - } - - CSLDestroy(papszTokens); - return TRUE; + return STARTS_WITH(pszToken, "GRID"); } /************************************************************************/ @@ -284,14 +312,22 @@ GDALDataset *ZMapDataset::Open(GDALOpenInfo *poOpenInfo) " datasets."); return nullptr; } + + auto poDS = std::make_unique(); + poDS->m_fp.reset(poOpenInfo->fpL); + poOpenInfo->fpL = nullptr; + /* -------------------------------------------------------------------- */ /* Find dataset characteristics */ /* -------------------------------------------------------------------- */ const char *pszLine; - - while ((pszLine = CPLReadLine2L(poOpenInfo->fpL, 100, nullptr)) != nullptr) + int nLine = 0; + constexpr int MAX_HEADER_LINE = 1024; + while ((pszLine = CPLReadLine2L(poDS->m_fp.get(), MAX_HEADER_LINE, + nullptr)) != nullptr) { + ++nLine; if (*pszLine == '!') { continue; @@ -302,160 +338,155 @@ GDALDataset *ZMapDataset::Open(GDALOpenInfo *poOpenInfo) // cppcheck-suppress knownConditionTrueFalse if (pszLine == nullptr) { - VSIFCloseL(poOpenInfo->fpL); - poOpenInfo->fpL = nullptr; return nullptr; } /* Parse first header line */ - char **papszTokens = CSLTokenizeString2(pszLine, ",", 0); - if (CSLCount(papszTokens) != 3) + CPLStringList aosTokensFirstLine(CSLTokenizeString2(pszLine, ",", 0)); + if (aosTokensFirstLine.size() != 3) { - CSLDestroy(papszTokens); - VSIFCloseL(poOpenInfo->fpL); - poOpenInfo->fpL = nullptr; return nullptr; } - const int nValuesPerLine = atoi(papszTokens[2]); + const int nValuesPerLine = atoi(aosTokensFirstLine[2]); if (nValuesPerLine <= 0) { - CSLDestroy(papszTokens); - VSIFCloseL(poOpenInfo->fpL); - poOpenInfo->fpL = nullptr; + CPLError(CE_Failure, CPLE_AppDefined, + "Invalid/unsupported value for nValuesPerLine = %d", + nValuesPerLine); return nullptr; } - CSLDestroy(papszTokens); - papszTokens = nullptr; - /* Parse second header line */ - pszLine = CPLReadLine2L(poOpenInfo->fpL, 100, nullptr); + pszLine = CPLReadLine2L(poDS->m_fp.get(), MAX_HEADER_LINE, nullptr); + ++nLine; if (pszLine == nullptr) { - VSIFCloseL(poOpenInfo->fpL); - poOpenInfo->fpL = nullptr; return nullptr; } - papszTokens = CSLTokenizeString2(pszLine, ",", 0); - if (CSLCount(papszTokens) != 5) + const CPLStringList aosTokensSecondLine( + CSLTokenizeString2(pszLine, ",", 0)); + if (aosTokensSecondLine.size() != 5) + { + return nullptr; + } + + const int nFieldSize = atoi(aosTokensSecondLine[0]); + const double dfNoDataValue = CPLAtofM(aosTokensSecondLine[1]); + const int nDecimalCount = atoi(aosTokensSecondLine[3]); + const int nColumnNumber = atoi(aosTokensSecondLine[4]); + + if (nFieldSize <= 0 || nFieldSize >= 40) { - CSLDestroy(papszTokens); - VSIFCloseL(poOpenInfo->fpL); - poOpenInfo->fpL = nullptr; + CPLError(CE_Failure, CPLE_AppDefined, + "Invalid/unsupported value for nFieldSize = %d", nFieldSize); return nullptr; } - const int nFieldSize = atoi(papszTokens[0]); - const double dfNoDataValue = CPLAtofM(papszTokens[1]); - const int nDecimalCount = atoi(papszTokens[3]); - const int nColumnNumber = atoi(papszTokens[4]); + if (nDecimalCount <= 0 || nDecimalCount >= nFieldSize) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Invalid/unsupported value for nDecimalCount = %d", + nDecimalCount); + return nullptr; + } - CSLDestroy(papszTokens); - papszTokens = nullptr; + if (nColumnNumber != 1) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Invalid/unsupported value for nColumnNumber = %d", + nColumnNumber); + return nullptr; + } - if (nFieldSize <= 0 || nFieldSize >= 40 || nDecimalCount <= 0 || - nDecimalCount >= nFieldSize || nColumnNumber != 1) + if (nValuesPerLine <= 0 || nFieldSize > 1024 * 1024 / nValuesPerLine) { - CPLDebug("ZMap", "nFieldSize=%d, nDecimalCount=%d, nColumnNumber=%d", - nFieldSize, nDecimalCount, nColumnNumber); - VSIFCloseL(poOpenInfo->fpL); - poOpenInfo->fpL = nullptr; + CPLError(CE_Failure, CPLE_AppDefined, + "Invalid/unsupported value for nFieldSize = %d x " + "nValuesPerLine = %d", + nFieldSize, nValuesPerLine); return nullptr; } /* Parse third header line */ - pszLine = CPLReadLine2L(poOpenInfo->fpL, 100, nullptr); + pszLine = CPLReadLine2L(poDS->m_fp.get(), MAX_HEADER_LINE, nullptr); + ++nLine; if (pszLine == nullptr) { - VSIFCloseL(poOpenInfo->fpL); - poOpenInfo->fpL = nullptr; return nullptr; } - papszTokens = CSLTokenizeString2(pszLine, ",", 0); - if (CSLCount(papszTokens) != 6) + const CPLStringList aosTokensThirdLine(CSLTokenizeString2(pszLine, ",", 0)); + if (aosTokensThirdLine.size() != 6) { - CSLDestroy(papszTokens); - VSIFCloseL(poOpenInfo->fpL); - poOpenInfo->fpL = nullptr; return nullptr; } - const int nRows = atoi(papszTokens[0]); - const int nCols = atoi(papszTokens[1]); - const double dfMinX = CPLAtofM(papszTokens[2]); - const double dfMaxX = CPLAtofM(papszTokens[3]); - const double dfMinY = CPLAtofM(papszTokens[4]); - const double dfMaxY = CPLAtofM(papszTokens[5]); - - CSLDestroy(papszTokens); - papszTokens = nullptr; + const int nRows = atoi(aosTokensThirdLine[0]); + const int nCols = atoi(aosTokensThirdLine[1]); + const double dfMinX = CPLAtofM(aosTokensThirdLine[2]); + const double dfMaxX = CPLAtofM(aosTokensThirdLine[3]); + const double dfMinY = CPLAtofM(aosTokensThirdLine[4]); + const double dfMaxY = CPLAtofM(aosTokensThirdLine[5]); if (!GDALCheckDatasetDimensions(nCols, nRows) || nCols == 1 || nRows == 1) { - VSIFCloseL(poOpenInfo->fpL); - poOpenInfo->fpL = nullptr; return nullptr; } /* Ignore fourth header line */ - pszLine = CPLReadLine2L(poOpenInfo->fpL, 100, nullptr); + pszLine = CPLReadLine2L(poDS->m_fp.get(), MAX_HEADER_LINE, nullptr); + ++nLine; if (pszLine == nullptr) { - VSIFCloseL(poOpenInfo->fpL); - poOpenInfo->fpL = nullptr; return nullptr; } /* Check fifth header line */ - pszLine = CPLReadLine2L(poOpenInfo->fpL, 100, nullptr); + pszLine = CPLReadLine2L(poDS->m_fp.get(), MAX_HEADER_LINE, nullptr); + ++nLine; if (pszLine == nullptr || pszLine[0] != '@') { - VSIFCloseL(poOpenInfo->fpL); - poOpenInfo->fpL = nullptr; return nullptr; } /* -------------------------------------------------------------------- */ /* Create a corresponding GDALDataset. */ /* -------------------------------------------------------------------- */ - ZMapDataset *poDS = new ZMapDataset(); - poDS->fp = poOpenInfo->fpL; - poOpenInfo->fpL = nullptr; - poDS->nDataStartOff = VSIFTellL(poDS->fp); - poDS->nValuesPerLine = nValuesPerLine; - poDS->nFieldSize = nFieldSize; - poDS->nDecimalCount = nDecimalCount; + poDS->m_nDataStartOff = VSIFTellL(poDS->m_fp.get()); + poDS->m_nValuesPerLine = nValuesPerLine; + poDS->m_nFieldSize = nFieldSize; + poDS->m_nDecimalCount = nDecimalCount; poDS->nRasterXSize = nCols; poDS->nRasterYSize = nRows; - poDS->dfNoDataValue = dfNoDataValue; + poDS->m_dfNoDataValue = dfNoDataValue; + poDS->m_nFirstDataLine = nLine; if (CPLTestBool(CPLGetConfigOption("ZMAP_PIXEL_IS_POINT", "FALSE"))) { const double dfStepX = (dfMaxX - dfMinX) / (nCols - 1); const double dfStepY = (dfMaxY - dfMinY) / (nRows - 1); - poDS->adfGeoTransform[0] = dfMinX - dfStepX / 2; - poDS->adfGeoTransform[1] = dfStepX; - poDS->adfGeoTransform[3] = dfMaxY + dfStepY / 2; - poDS->adfGeoTransform[5] = -dfStepY; + poDS->m_adfGeoTransform[0] = dfMinX - dfStepX / 2; + poDS->m_adfGeoTransform[1] = dfStepX; + poDS->m_adfGeoTransform[3] = dfMaxY + dfStepY / 2; + poDS->m_adfGeoTransform[5] = -dfStepY; } else { const double dfStepX = (dfMaxX - dfMinX) / nCols; const double dfStepY = (dfMaxY - dfMinY) / nRows; - poDS->adfGeoTransform[0] = dfMinX; - poDS->adfGeoTransform[1] = dfStepX; - poDS->adfGeoTransform[3] = dfMaxY; - poDS->adfGeoTransform[5] = -dfStepY; + poDS->m_adfGeoTransform[0] = dfMinX; + poDS->m_adfGeoTransform[1] = dfStepX; + poDS->m_adfGeoTransform[3] = dfMaxY; + poDS->m_adfGeoTransform[5] = -dfStepY; } /* -------------------------------------------------------------------- */ /* Create band information objects. */ /* -------------------------------------------------------------------- */ poDS->nBands = 1; - poDS->SetBand(1, new ZMapRasterBand(poDS)); + poDS->SetBand(1, new ZMapRasterBand(poDS.get())); /* -------------------------------------------------------------------- */ /* Initialize any PAM information. */ @@ -466,8 +497,8 @@ GDALDataset *ZMapDataset::Open(GDALOpenInfo *poOpenInfo) /* -------------------------------------------------------------------- */ /* Support overviews. */ /* -------------------------------------------------------------------- */ - poDS->oOvManager.Initialize(poDS, poOpenInfo->pszFilename); - return poDS; + poDS->oOvManager.Initialize(poDS.get(), poOpenInfo->pszFilename); + return poDS.release(); } /************************************************************************/ @@ -651,42 +682,50 @@ GDALDataset *ZMapDataset::CreateCopy(const char *pszFilename, /* -------------------------------------------------------------------- */ /* Copy imagery */ /* -------------------------------------------------------------------- */ - double *padfLineBuffer = - reinterpret_cast(CPLMalloc(nYSize * sizeof(double))); + std::vector adfLineBuffer(nYSize); CPLErr eErr = CE_None; + const bool bEmitEOLAtEndOfColumn = CPLTestBool( + CPLGetConfigOption("ZMAP_EMIT_EOL_AT_END_OF_COLUMN", "YES")); + bool bEOLPrinted = false; + int nValuesThisLine = 0; for (int i = 0; i < nXSize && eErr == CE_None; i++) { - eErr = poSrcDS->GetRasterBand(1)->RasterIO(GF_Read, i, 0, 1, nYSize, - padfLineBuffer, 1, nYSize, - GDT_Float64, 0, 0, nullptr); + eErr = poSrcDS->GetRasterBand(1)->RasterIO( + GF_Read, i, 0, 1, nYSize, adfLineBuffer.data(), 1, nYSize, + GDT_Float64, 0, 0, nullptr); if (eErr != CE_None) break; - bool bEOLPrinted = false; - int j = 0; - for (; j < nYSize; j++) + for (int j = 0; j < nYSize; j++) { - WriteRightJustified(fp, padfLineBuffer[j], nFieldSize, + WriteRightJustified(fp, adfLineBuffer[j], nFieldSize, nDecimalCount); - if (((j + 1) % nValuesPerLine) == 0) + ++nValuesThisLine; + if (nValuesThisLine == nValuesPerLine) { bEOLPrinted = true; + nValuesThisLine = 0; VSIFPrintfL(fp, "\n"); } else bEOLPrinted = false; } - if (!bEOLPrinted) + if (bEmitEOLAtEndOfColumn && !bEOLPrinted) + { + bEOLPrinted = true; + nValuesThisLine = 0; VSIFPrintfL(fp, "\n"); + } if (pfnProgress != nullptr && - !pfnProgress((j + 1) * 1.0 / nYSize, nullptr, pProgressData)) + !pfnProgress((i + 1) * 1.0 / nXSize, nullptr, pProgressData)) { eErr = CE_Failure; break; } } - CPLFree(padfLineBuffer); + if (!bEOLPrinted) + VSIFPrintfL(fp, "\n"); VSIFCloseL(fp); if (eErr != CE_None) @@ -702,7 +741,7 @@ GDALDataset *ZMapDataset::CreateCopy(const char *pszFilename, CPLErr ZMapDataset::GetGeoTransform(double *padfTransform) { - memcpy(padfTransform, adfGeoTransform, 6 * sizeof(double)); + memcpy(padfTransform, m_adfGeoTransform.data(), 6 * sizeof(double)); return CE_None; }