Allows CalibratorZaga to create prob of precip field

metno · Jun 22, 2015 · d6aaaf5 · d6aaaf5
1 parent 92463e7
commit d6aaaf5
Show file tree

Hide file tree

Showing 6 changed files with 268 additions and 78 deletions.
diff --git a/DEVELOPERS.rst b/DEVELOPERS.rst
@@ -36,6 +36,11 @@ Debugging
 gridpp can be compiled with debugging symbols by executing make gridpp_debug. The executable gridpp_debug can
 then be used by a debugger such as gdb.
 
+Parallelization
+---------------
+Add openMP directives where it makes sense to parallelize. Ensure that any data retrival calls (e.g.
+calls to File::getField) are made before the directives otherwise a runtime error occurs.
+
 Testing of code
 ---------------
 All test code is placed in src/Testing. Use one file for each class. To test your newly added class

diff --git a/src/Calibrator/Calibrator.cpp b/src/Calibrator/Calibrator.cpp
@@ -22,13 +22,8 @@ Calibrator* Calibrator::getScheme(std::string iName, const Options& iOptions) {
       if(!iOptions.getValue("variable", variable)) {
          Util::error("Calibrator 'zaga' needs variable");
       }
-      CalibratorZaga* c = new CalibratorZaga(parFile, Variable::getType(variable));
+      CalibratorZaga* c = new CalibratorZaga(parFile, Variable::getType(variable), iOptions);
 
-      // Optional settings
-      float fracThreshold;
-      if(iOptions.getValue("fracThreshold", fracThreshold)) {
-         c->setFracThreshold(fracThreshold);
-      }
       return c;
    }
    else if(iName == "cloud") {

diff --git a/src/Calibrator/Zaga.cpp b/src/Calibrator/Zaga.cpp
@@ -6,12 +6,28 @@
 #include "../File/File.h"
 #include "../ParameterFile.h"
 #include "../Parameters.h"
-const float CalibratorZaga::mMaxEnsMean = 100;
-CalibratorZaga::CalibratorZaga(const ParameterFile* iParameterFile, Variable::Type iMainPredictor):
+CalibratorZaga::CalibratorZaga(const ParameterFile* iParameterFile, Variable::Type iMainPredictor, const Options& iOptions):
       Calibrator(),
       mParameterFile(iParameterFile),
       mMainPredictor(iMainPredictor),
-      mFracThreshold(0.5) {
+      mFracThreshold(0.5),
+      mOutputPop(false),
+      mNeighbourhoodSize(0),
+      mPopThreshold(0.5),
+      mMaxEnsMean(100) {
+   iOptions.getValue("fracThreshold", mFracThreshold);
+   iOptions.getValue("outputPop", mOutputPop);
+   iOptions.getValue("neighbourhoodSize", mNeighbourhoodSize);
+   iOptions.getValue("popThreshold", mPopThreshold);
+   iOptions.getValue("maxEnsMean", mMaxEnsMean);
+   if(!Util::isValid(mMaxEnsMean) || mMaxEnsMean <= 0) {
+      Util::error("CalibratorZaga maxEnsMean must be > 0");
+   }
+   if(mNeighbourhoodSize < 0) {
+      std::stringstream ss;
+      ss << "CalibratorZaga neighbourhoodSize (" << mNeighbourhoodSize << ") must be >= 0";
+      Util::error(ss.str());
+   }
 }
 
 bool CalibratorZaga::calibrateCore(File& iFile) const {
@@ -28,6 +44,11 @@ bool CalibratorZaga::calibrateCore(File& iFile) const {
       Parameters parameters = mParameterFile->getParameters(t);
       Field& precip = *iFile.getField(Variable::Precip, t);
 
+      FieldPtr pop;
+      if(mOutputPop) {
+         pop = iFile.getField(Variable::Pop, t);
+      }
+
       #pragma omp parallel for
       for(int i = 0; i < nLat; i++) {
          for(int j = 0; j < nLon; j++) {
@@ -41,47 +62,72 @@ bool CalibratorZaga::calibrateCore(File& iFile) const {
             // Check if current ensemble for this gridpoint/time has any missing
             // values. If so, don't calibrate the ensemble.
             for(int e = 0; e < nEns; e++) {
-               float value = precipRaw[e];
-               if(!Util::isValid(value)) {
-                  isValid = false;
-                  break;
+               if(mNeighbourhoodSize == 0) {
+                  float value = precip(i,j,e);
+                  if(!Util::isValid(value)) {
+                     isValid = false;
+                     break;
+                  }
+                  ensMean += value;
+                  ensFrac += (value <= mFracThreshold);
+                  counter++;
+               }
+               else {
+                  for(int ii = std::max(0, i-mNeighbourhoodSize); ii <= std::min(nLat-1, i+mNeighbourhoodSize); ii++) {
+                     for(int jj = std::max(0, j-mNeighbourhoodSize); jj <= std::min(nLon-1, j+mNeighbourhoodSize); jj++) {
+                        float value = precip(ii,jj,e);
+                        if(Util::isValid(value)) {
+                           ensMean += value;
+                           ensFrac += (value <= mFracThreshold);
+                           counter++;
+                        }
+                     }
+                  }
                }
-               ensMean += value;
-               ensFrac += (value <= mFracThreshold);
-               counter++;
             }
             if(isValid) {
                ensMean = ensMean / counter;
                ensFrac = ensFrac / counter;
 
                // Limit the input to the calibration model to prevent it
                // from creating very extreme values.
-               if(ensMean > mMaxEnsMean) {
+               if(Util::isValid(mMaxEnsMean) && ensMean > mMaxEnsMean) {
                   ensMean = mMaxEnsMean;
                }
 
-               // Calibrate
-               std::vector<std::pair<float,int> > pairs(nEns);
-               std::vector<float> valuesCal(nEns);
-               for(int e = 0; e < nEns; e++) {
-                  float quantile = ((float) e+0.5)/nEns;
-                  float valueCal   = getInvCdf(quantile, ensMean, ensFrac, parameters);
-                  precip(i,j,e) = valueCal;
-                  if(!Util::isValid(valueCal))
-                     isValid = false;
-               }
-               if(isValid) {
-                  std::vector<float> precipCal = precip(i,j);
-                  Calibrator::shuffle(precipRaw, precipCal);
+               if(mOutputPop) {
                   for(int e = 0; e < nEns; e++) {
-                     precip(i,j,e) = precipCal[e];
+                     float cdf = getCdf(mPopThreshold, ensMean, ensFrac, parameters);
+                     if(Util::isValid(cdf))
+                        (*pop)(i,j,e) = 1 - cdf;
+                     else
+                        (*pop)(i,j,e) = Util::MV;
                   }
                }
                else {
-                  numInvalidCal++;
-                  // Calibrator produced some invalid members. Revert to the raw values.
+                  // Calibrate
+                  std::vector<std::pair<float,int> > pairs(nEns);
+                  std::vector<float> valuesCal(nEns);
                   for(int e = 0; e < nEns; e++) {
-                     precip(i,j,e) = precipRaw[e];
+                     float quantile = ((float) e+0.5)/nEns;
+                     float valueCal   = getInvCdf(quantile, ensMean, ensFrac, parameters);
+                     precip(i,j,e) = valueCal;
+                     if(!Util::isValid(valueCal))
+                        isValid = false;
+                  }
+                  if(isValid) {
+                     std::vector<float> precipCal = precip(i,j);
+                     Calibrator::shuffle(precipRaw, precipCal);
+                     for(int e = 0; e < nEns; e++) {
+                        precip(i,j,e) = precipCal[e];
+                     }
+                  }
+                  else {
+                     numInvalidCal++;
+                     // Calibrator produced some invalid members. Revert to the raw values.
+                     for(int e = 0; e < nEns; e++) {
+                        precip(i,j,e) = precipRaw[e];
+                     }
                   }
                }
             }
@@ -165,6 +211,59 @@ float CalibratorZaga::getInvCdf(float iQuantile, float iEnsMean, float iEnsFrac,
       return Util::MV;
    return value;
 }
+float CalibratorZaga::getCdf(float iThreshold, float iEnsMean, float iEnsFrac, Parameters& iParameters) {
+   if(!Util::isValid(iThreshold) || !Util::isValid(iEnsMean) || !Util::isValid(iEnsFrac))
+      return Util::MV;
+
+   if(iEnsMean < 0 || iEnsFrac < 0 || iEnsFrac > 1)
+      return Util::MV;
+
+   if(iThreshold < 0)
+      return 0;
+
+   // Check that parameters are valid
+   for(int i =0; i < iParameters.size(); i++) {
+      if(!Util::isValid(iParameters[i]))
+         return Util::MV;
+   }
+
+   // Check if we are in the discrete mass
+   float P0 = getP0(iEnsMean, iEnsFrac, iParameters);
+   if(!Util::isValid(P0))
+      return Util::MV;
+   if(iThreshold == 0)
+      return P0;
+
+   float mua = iParameters[0];
+   float mub = iParameters[1];
+   float sa  = iParameters[2];
+   float sb  = iParameters[3];
+
+   // Compute parameters of distribution (in same way as done in gamlss in R)
+   float mu    = exp(mua + mub * pow(iEnsMean, 1.0/3));
+   float sigma = exp(sa + sb * iEnsMean);
+
+   if(mu <= 0 || sigma <= 0)
+      return Util::MV;
+   if(!Util::isValid(mu) || !Util::isValid(sigma))
+      return Util::MV;
+
+   // Parameters in boost and wikipedia
+   float shape = 1/(sigma*sigma); // k
+   float scale = sigma*sigma*mu;  // theta
+   if(!Util::isValid(scale) || !Util::isValid(shape))
+      return Util::MV;
+
+   // std::cout << mu << " " << sigma << " " << P0 << " " << shape << " " << scale << std::endl;
+   boost::math::gamma_distribution<> dist(shape, scale);
+   float contCdf = boost::math::cdf(dist, iThreshold) ;
+   float cdf = P0 + (1 - P0)*contCdf;
+   if(!Util::isValid(cdf))
+      return Util::MV;
+   assert(cdf <= 1);
+   assert(cdf >= 0);
+   return cdf;
+}
 float CalibratorZaga::getP0(float iEnsMean, float iEnsFrac, Parameters& iParameters) {
    if(!Util::isValid(iEnsMean) || !Util::isValid(iEnsFrac) || iEnsMean < 0 || iEnsFrac < 0 || iEnsFrac > 1)
       return Util::MV;
@@ -181,14 +280,6 @@ float CalibratorZaga::getP0(float iEnsMean, float iEnsFrac, Parameters& iParamet
    float P0 = Util::invLogit(logit);
    return P0;
 }
-void CalibratorZaga::setFracThreshold(float iFraction) {
-   if(!Util::isValid(iFraction) || iFraction < 0) {
-      std::stringstream ss;
-      ss << "CalibratorZaga: fraction threshold (" << iFraction << ") must be 0 or greater.";
-      Util::error(ss.str());
-   }
-   mFracThreshold = iFraction;
-}
 
 std::string CalibratorZaga::description() {
    std::stringstream ss;
@@ -203,5 +294,9 @@ std::string CalibratorZaga::description() {
    ss << Util::formatDescription("", "offsetN a b c d e f g h") << std::endl;
    ss << Util::formatDescription("", "If the file only has a single line, then the same set of parameters are used for all offsets.") << std::endl;
    ss << Util::formatDescription("   fracThreshold=0.5", "Threshold defining precip/no-precip boundary when computing fraction of members with precip.") << std::endl;
+   ss << Util::formatDescription("   neighbourhoodSize=0", "Increase the ensemble by taking all gridpoints within a neighbourhood. A value of 0 means no neighbourhood is used.") << std::endl;
+   ss << Util::formatDescription("   outputPop=0", "Should probability of precip be written to the Pop field?") << std::endl;
+   ss << Util::formatDescription("   popThreshold=0.5", "If Pop is written, what threshold should be used?") << std::endl;
+   ss << Util::formatDescription("   maxEnsMean=100", "Upper limit of what the ensemble mean is allowed to be when passed into the distribution. This effectively prevents the distribution to yield very high values.") << std::endl;
    return ss.str();
 }
diff --git a/src/Calibrator/Zaga.h b/src/Calibrator/Zaga.h
@@ -12,27 +12,34 @@ class Parameters;
 //! Designed for precip
 class CalibratorZaga : public Calibrator {
    public:
-      CalibratorZaga(const ParameterFile* iParameterFile, Variable::Type iMainPredictor);
+      CalibratorZaga(const ParameterFile* iParameterFile, Variable::Type iMainPredictor, const Options& iOptions);
       //! Get probability mass at 0 mm (i.e probability of no precipitation)
       //! If any input has missing values, the end result is missing
       static float getP0(float iEnsMean, float iEnsFrac, Parameters& iParameters);
       //! Get Precipitation amount corresponding to quantile
       //! If any input has missing values, the end result is missing
       static float getInvCdf(float iQuantile, float iEnsMean, float iEnsFrac, Parameters& iParameters);
-      //! Set threshold between precip/no-precip used when computing what fraction of
-      //! members have precip.
-      //! @param must be valid and >= 0
-      void setFracThreshold(float iFraction);
+      //! Get Precipitation amount corresponding to quantile. If any input has missing values, or
+      //! iEnsMean < 0 or iEnsFrac is not in [0,1], a missing value is returned.
+      static float getCdf(float iThreshold, float iEnsMean, float iEnsFrac, Parameters& iParameters);
+
       float getFracThreshold() {return mFracThreshold;};
+      bool  getOutputPop() {return mOutputPop;};
+      float getPopThreshold() {return mPopThreshold;};
+      int   getNeighbourhoodSize() {return mNeighbourhoodSize;};
+      float getMaxEnsMean() {return mMaxEnsMean;};
 
       static std::string description();
       std::string name() const {return "zaga";};
    private:
       const ParameterFile* mParameterFile;
       bool calibrateCore(File& iFile) const;
-      static const float mMaxEnsMean;
       //! What precip threshold should be used to count members with no precip?
       float mFracThreshold;
       Variable::Type mMainPredictor;
+      bool mOutputPop;
+      int  mNeighbourhoodSize;
+      float mPopThreshold;
+      float mMaxEnsMean;
 };
 #endif
diff --git a/src/Testing/Calibrator.cpp b/src/Testing/Calibrator.cpp
@@ -107,12 +107,30 @@ namespace {
       EXPECT_TRUE(vec2[3]==1 || vec2[3]==2);
    }
    TEST_F(TestCalibrator, factoryZaga) {
-      Calibrator* c;
-      c = Calibrator::getScheme("zaga", Options("variable=T parameters=testing/files/parameters.txt fracThreshold=0.9"));
-      EXPECT_TRUE(c);
-      EXPECT_EQ("zaga", c->name());
-      EXPECT_FLOAT_EQ(0.9, ((CalibratorZaga*) c)->getFracThreshold());
-      delete c;
+      {
+         Calibrator* c;
+         c = Calibrator::getScheme("zaga", Options("variable=T parameters=testing/files/parameters.txt popThreshold=0.24 outputPop=1 fracThreshold=0.34 neighbourhoodSize=24 maxEnsMean=90"));
+         EXPECT_TRUE(c);
+         EXPECT_EQ("zaga", c->name());
+         EXPECT_FLOAT_EQ(0.24, ((CalibratorZaga*) c)->getPopThreshold());
+         EXPECT_TRUE(          ((CalibratorZaga*) c)->getOutputPop());
+         EXPECT_FLOAT_EQ(0.34, ((CalibratorZaga*) c)->getFracThreshold());
+         EXPECT_EQ(24,         ((CalibratorZaga*) c)->getNeighbourhoodSize());
+         EXPECT_FLOAT_EQ(90,          ((CalibratorZaga*) c)->getMaxEnsMean());
+         delete c;
+      }
+      {
+         Calibrator* c;
+         c = Calibrator::getScheme("zaga", Options("variable=T parameters=testing/files/parameters.txt popThreshold=-0.12 outputPop=0 fracThreshold=-0.92 neighbourhoodSize=6 maxEnsMean=40"));
+         EXPECT_TRUE(c);
+         EXPECT_EQ("zaga", c->name());
+         EXPECT_FLOAT_EQ(-0.12, ((CalibratorZaga*) c)->getPopThreshold());
+         EXPECT_FALSE(          ((CalibratorZaga*) c)->getOutputPop());
+         EXPECT_FLOAT_EQ(-0.92, ((CalibratorZaga*) c)->getFracThreshold());
+         EXPECT_EQ(6,           ((CalibratorZaga*) c)->getNeighbourhoodSize());
+         EXPECT_FLOAT_EQ(40,          ((CalibratorZaga*) c)->getMaxEnsMean());
+         delete c;
+      }
    }
    TEST_F(TestCalibrator, factoryNeighbourhood) {
       Calibrator* c;