Merge pull request #20 from wrightky/master

Add nourishment functions
passaH2O · Jan 18, 2021 · 4ab8cc7 · 4ab8cc7
2 parents df20373 + 21101cf
commit 4ab8cc7
Show file tree

Hide file tree

Showing 11 changed files with 586 additions and 26 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -1,4 +1,4 @@
-# This workflow installs and tests dorado on mulitple python versions and operating systems.
+# This workflow installs and tests dorado on multiple python versions and operating systems.
 
 name: build
 
@@ -105,6 +105,7 @@ jobs:
         python examples/true_random_walk.py
         python examples/parallel_comparison.py
         python examples/traveltime_straight_channel.py
+        python examples/nourishment_example.py
         cd examples
         python unsteady_example.py
 

diff --git a/docs/source/examples/example14.rst b/docs/source/examples/example14.rst
@@ -0,0 +1,69 @@
+.. _example14:
+
+Example 14 - Nourishment Area and Time Functions
+================================================
+
+In this example, we analyze the travel history of a particle injection by computing the "nourishment area" and "nourishment time" for a given seed location.
+
+Full example script available :download:`here <../../../examples/nourishment_example.py>`.
+
+We will again make use of the DeltaRCM example data used in :ref:`example02`, but in this case we will slightly change the settings used in the random walk. In particular, this time we will include discharge fields, in order to obtain more useful travel time information.
+
+.. doctest::
+
+   >>> import numpy as np
+   >>> import dorado
+   >>> data = np.load('ex_deltarcm_data.npz')
+
+Now we will create the parameter class and assign attributes to it.
+
+.. doctest::
+
+   >>> params = dorado.particle_track.modelParams()
+   >>> params.stage = data['stage']
+   >>> params.depth = data['depth']
+   >>> params.qx = data['qx']
+   >>> params.qy = data['qy']
+   >>> params.dx = 50.
+   >>> params.gamma = 0.5
+   >>> params.model = 'DeltaRCM'
+
+Now that the parameters have all been defined, we will define the particles class and generate a set of particles.
+
+.. doctest::
+
+   >>> seed_xloc = list(range(15, 17))
+   >>> seed_yloc = list(range(137, 140))
+   >>> Np_tracer = 300
+   >>> particles = dorado.particle_track.Particles(params)
+   >>> particles.generate_particles(Np_tracer, seed_xloc, seed_yloc)
+
+Now, we will route the particles for 120 iterations. Notice that we have used more particles and iterations than in :ref:`example02` -- this helps us obtain more statistically representative results later on.
+
+.. doctest::
+
+   >>> walk_data = dorado.routines.steady_plots(particles, 120, 'nourishment_example')
+
+Now that we have an existing `walk_data` dictionary, let's visualize some bulk information about the travel history. First, we can compute which regions of the domain were most visited by particles. This metric is often referred to as the `Nourishment Area` (e.g. Liang et al., 2016, doi.org/10.1002/2015JF003653) of a sample location, as it represents regions which are "nourished" by material from that location. We can compute how frequently cells were visited by particles using the `nourishment_area` function, and visualize the results using the `show_nourishment_area` routine.
+
+.. doctest::
+
+   >>> visit_freq = dorado.particle_track.nourishment_area(walk_data, params.depth.shape)
+   >>> ax = dorado.routines.show_nourishment_area(visit_freq, params.depth, walk_data)
+
+.. image:: images/example14/NourishmentArea.png
+
+The result is visualized in the figure above. Flow depth serves as the background in this figure, on top of which the Nourishment Area is shown, with regions colored by how frequently each cell was occupied by particles. Because we are often interested in where particles can go in general, rather than exactly where they have gone, by default this function will perform a small amount of Gaussian filtering, to smooth out some of the stochasticity in the travel paths. This smoothing can be turned off or ramped up depending on the application. Additionally, the plotting routine comes with many optional settings which can be used to change the aesthetics of the resulting figure.
+
+Because the Nourishment Area is a time-integrated measure of where particles are going, we may also want to know how long particles tend to stay there once they get there. For this question, we have provided a second function, which we are calling the Nourishment Time, which computes how long on average particles spend in each cell they travel through. In steady model runs (such as this one), the result is trivially related to the flow velocity of each cell -- however, for unsteady runs, in which the underlying flow field might be changing in between particle iterations, the results of this function can be more interesting.
+
+Similar to before, we compute this by calling on the `nourishment_time` function, and visualize the results using the `show_nourishment_time` routine.
+
+.. doctest::
+
+   >>> mean_times = dorado.particle_track.nourishment_time(walk_data, params.depth.shape, clip=95)
+   >>> ax = dorado.routines.show_nourishment_time(mean_times, params.depth, walk_data)
+
+.. image:: images/example14/NourishmentTime.png
+
+The result is visualized above. As with the previous function, some amount of spatial filtering has been applied. From comparing these two figures, some interesting trends emerge. For example, even though the main distributary channels are frequently visited by particles, the particles on average don't spend very much time there. Depending on the material or question of interest, perhaps these can provide useful insights!
diff --git a/docs/source/examples/images/example14/NourishmentArea.png b/docs/source/examples/images/example14/NourishmentArea.png
diff --git a/docs/source/examples/images/example14/NourishmentTime.png b/docs/source/examples/images/example14/NourishmentTime.png
diff --git a/docs/source/examples/index.rst b/docs/source/examples/index.rst
@@ -32,6 +32,7 @@ Examples of the high-level API functionality are provided here. These examples r
    example11
    example12
    example13
+   example14
 
 
 External Examples

diff --git a/dorado/__init__.py b/dorado/__init__.py
@@ -1,4 +1,5 @@
-__version__ = "2.3.0"
+__version__ = "2.4.0"
+
 
 from . import lagrangian_walker
 from . import parallel_routing

diff --git a/dorado/particle_track.py b/dorado/particle_track.py
@@ -12,8 +12,6 @@
 from builtins import range
 from math import pi
 import numpy as np
-import scipy
-from scipy import interpolate
 from tqdm import tqdm
 import dorado.lagrangian_walker as lw
 
@@ -901,10 +899,153 @@ def exposure_time(walk_data,
     return exposure_times.tolist()
 
 
+def nourishment_area(walk_data, raster_size, sigma=0.7, clip=99.5):
+    """Determine the nourishment area of a particle injection
+
+    Function will measure the regions of the domain 'fed' by a seed location,
+    as indicated by the history of particle travel locations in walk_data.
+    Returns a heatmap raster, in which values indicate number of occasions
+    each cell was occupied by a particle (after spatial filtering to reduce
+    noise in the random walk, and normalizing by number of particles).
+
+    **Inputs** :
+
+        walk_data : `dict`
+            Dictionary of all prior x locations, y locations, and travel
+            times (the output of run_iteration)
+
+        raster_size : `tuple`
+            Tuple (L,W) of the domain dimensions, i.e. the output of
+            numpy.shape(raster).
+
+        sigma : `float`, optional
+            Degree of spatial smoothing of the area, implemented using a
+            Gaussian kernal of the same sigma, via scipy.ndimage.gaussian_filter
+            Default is light smoothing with sigma = 0.7 (to turn off smoothing,
+            set sigma = 0)
+
+        clip : `float`, optional
+            Percentile at which to truncate the distribution. Particles which
+            get stuck can lead to errors at the high-extreme, so this 
+            parameter is used to normalize by a "near-max". Default is the
+            99.5th percentile. To use true max, specify clip = 100
+
+    **Outputs** :
+
+        visit_freq : `numpy.ndarray`
+            Array of normalized particle visit frequency, with cells in the
+            range [0, 1] representing the number of instances particles visited
+            that cell. If sigma > 0, the array values include spatial filtering
+
+    """
+    if sigma > 0:
+        from scipy.ndimage import gaussian_filter
+
+    # Measure visit frequency
+    visit_freq = np.zeros(raster_size)
+    for ii in list(range(len(walk_data['xinds']))):
+        for jj in list(range(len(walk_data['xinds'][ii]))):
+            visit_freq[walk_data['xinds'][ii][jj],
+                       walk_data['yinds'][ii][jj]] += 1
+
+    # Clip out highest percentile to correct possible outliers
+    vmax = float(np.nanpercentile(visit_freq, clip))
+    visit_freq = np.clip(visit_freq, 0, vmax)
+
+    # If applicable, do smoothing
+    if sigma > 0:
+        visit_freq = gaussian_filter(visit_freq, sigma=sigma)
+        visit_freq[visit_freq==np.min(visit_freq)] = np.nan
+    else:
+        visit_freq[visit_freq==0] = np.nan
+
+    # Normalize to 0-1
+    visit_freq = visit_freq/np.nanmax(visit_freq)
+
+    return visit_freq
+
+
+def nourishment_time(walk_data, raster_size, sigma=0.7, clip=99.5):
+    """Determine the nourishment time of a particle injection
+
+    Function will measure the average length of time particles spend in each
+    area of the domain for a given seed location, as indicated by the history
+    of particle travel times in walk_data. Returns a heatmap raster, in
+    which values indicate the average length of time particles tend to remain
+    in each cell (after spatial filtering to reduce noise in the random walk).
+
+    **Inputs** :
+
+        walk_data : `dict`
+            Dictionary of all prior x locations, y locations, and travel
+            times (the output of run_iteration)
+
+        raster_size : `tuple`
+            Tuple (L,W) of the domain dimensions, i.e. the output of
+            numpy.shape(raster).
+
+        sigma : `float`, optional
+            Degree of spatial smoothing of the area, implemented using a
+            Gaussian kernal of the same sigma, via scipy.ndimage.gaussian_filter
+            Default is light smoothing with sigma = 0.7 (to turn off smoothing,
+            set sigma = 0)
+
+        clip : `float`, optional
+            Percentile at which to truncate the distribution. Particles which
+            get stuck can lead to errors at the high-extreme, so this 
+            parameter is used to normalize by a "near-max". Default is the
+            99.5th percentile. To use true max, specify clip = 100
+
+    **Outputs** :
+
+        mean_time : `numpy.ndarray`
+            Array of mean occupation time, with cell values representing the 
+            mean time particles spent in that cell. If sigma > 0, the array
+            values include spatial filtering
+
+    """
+    if sigma > 0:
+        from scipy.ndimage import gaussian_filter
+
+    # Measure visit frequency
+    visit_freq = np.zeros(raster_size)
+    time_total = visit_freq.copy()
+    for ii in list(range(len(walk_data['xinds']))):
+        for jj in list(range(1, len(walk_data['xinds'][ii])-1)):
+            # Running total of particles in this cell to find average
+            visit_freq[walk_data['xinds'][ii][jj],
+                       walk_data['yinds'][ii][jj]] += 1
+            # Count the time in this cell as 0.5*last_dt + 0.5*next_dt
+            last_dt = (walk_data['travel_times'][ii][jj] - \
+                       walk_data['travel_times'][ii][jj-1])
+            next_dt = (walk_data['travel_times'][ii][jj+1] - \
+                       walk_data['travel_times'][ii][jj])
+            time_total[walk_data['xinds'][ii][jj],
+                       walk_data['yinds'][ii][jj]] += 0.5*(last_dt + next_dt)
+    # Find mean time in each cell
+    with np.errstate(divide='ignore', invalid='ignore'):
+        mean_time = time_total / visit_freq
+    mean_time[visit_freq == 0] = 0
+    # Prone to numerical outliers, so clip out extremes
+    vmax = float(np.nanpercentile(mean_time, clip))
+    mean_time = np.clip(mean_time, 0, vmax)
+
+    # If applicable, do smoothing
+    if sigma > 0:
+        mean_time = gaussian_filter(mean_time, sigma=sigma)
+        mean_time[mean_time==np.min(mean_time)] = np.nan
+    else:
+        mean_time[mean_time==0] = np.nan
+
+    return mean_time
+
+
 def unstruct2grid(coordinates,
                   quantity,
                   cellsize,
-                  k_nearest_neighbors=3):
+                  k_nearest_neighbors=3,
+                  boundary=None,
+                  crop=True):
     """Convert unstructured model outputs into gridded arrays.
 
     Interpolates model variables (e.g. depth, velocity) from an
@@ -929,10 +1070,19 @@ def unstruct2grid(coordinates,
         cellsize : `float or int`
             Length along one square cell face.
 
-        k_nearest_neighbors : `int`
+        k_nearest_neighbors : `int`, optional
             Number of nearest neighbors to use in the interpolation.
             If k>1, inverse-distance-weighted interpolation is used.
 
+        boundary : `list`, optional
+            List [] of (x,y) coordinates used to delineate the boundary of
+            interpolation. Points outside the polygon will be assigned as nan.
+            Format needs to match requirements of matplotlib.path.Path()
+
+        crop : `bool`, optional
+            If a boundary is specified, setting crop to True will eliminate
+            any all-NaN borders from the interpolated rasters.
+
     **Outputs** :
 
         interp_func : `function`
@@ -947,6 +1097,10 @@ def unstruct2grid(coordinates,
             Array of quantity after interpolation.
 
     """
+    import matplotlib
+    import scipy
+    from scipy import interpolate
+
     cellsize = float(cellsize)
 
     # Make sure all input values are floats
@@ -969,6 +1123,11 @@ def unstruct2grid(coordinates,
     gridXY_array = np.array([np.concatenate(gridX),
                              np.concatenate(gridY)]).transpose()
     gridXY_array = np.ascontiguousarray(gridXY_array)
+
+    # If a boundary has been specified, create array to index outside it
+    if boundary is not None:
+        path = matplotlib.path.Path(boundary)
+        outside = ~path.contains_points(gridXY_array)
 
     # Create Interpolation function
     if k_nearest_neighbors == 1:  # Only use nearest neighbor
@@ -980,9 +1139,15 @@ def unstruct2grid(coordinates,
         def interp_func(data):
             if isinstance(data, list):
                 data = np.array(data)
-            gridded_data = data[gridqInd]
+            gridded_data = data[gridqInd].astype(float)
+            if boundary is not None:
+                gridded_data[outside] = np.nan # Crop to bounds
             gridded_data.shape = (len(yvect), len(xvect))
             gridded_data = np.flipud(gridded_data)
+            if boundary is not None and crop is True:
+                mask = ~np.isnan(gridded_data) # Delete all-nan border
+                gridded_data = gridded_data[np.ix_(mask.any(1),
+                                                   mask.any(0))]
             return gridded_data
     else:
         # Inverse-distance interpolation
@@ -999,10 +1164,16 @@ def interp_func(data):
             num = 0.
             for i in list(range(k_nearest_neighbors)):
                 denom += nn_wts[:, i]
-                num += data[nn_inds[:, i]]*nn_wts[:, i]
+                num += data[nn_inds[:, i]].astype(float)*nn_wts[:, i]
             gridded_data = (num/denom)
+            if boundary is not None:
+                gridded_data[outside] = np.nan # Crop to bounds
             gridded_data.shape = (len(yvect), len(xvect))
             gridded_data = np.flipud(gridded_data)
+            if boundary is not None and crop is True:
+                mask = ~np.isnan(gridded_data) # Delete all-nan border
+                gridded_data = gridded_data[np.ix_(mask.any(1),
+                                                   mask.any(0))]
             return gridded_data
 
     # Finally, call the interpolation function to create array: