segment_extinction
In [1]:
%pylab inline
Populating the interactive namespace from numpy and matplotlib
In [2]:
%load_ext autoreload
%autoreload 2
In [3]:
import hsrl_utils.io
In [4]:
import netCDF4
In [5]:
import hsrl_utils.plot
In [6]:
import skimage.segmentation

Read obs

In [7]:
nc = netCDF4.Dataset('/data/cphillips/DOE_TEST_CASES/ahsrl_run13/out/ahsrl_20160504T0000_20160505T0000.nc')
ba = hsrl_utils.io.read_ba(nc)
depol = hsrl_utils.io.read_depol(nc)
color = hsrl_utils.io.read_color_ratio(nc)
od = hsrl_utils.io.read_od(nc)
qc = hsrl_utils.io.read_qc(nc)
nc.close()
/home/cphillips/hsrl_utils/hsrl_utils/io.py:50: RuntimeWarning: invalid value encountered in greater
  ba = nc.variables[k][:]
In [8]:
# Correct backscatter because the column sum is negative nearly everywhere
plot(np.nansum(ba.values.T, axis=0))
ba = ba.clip(0,None)
plot(np.nansum(ba.T, axis=0))
Out[8]:
[<matplotlib.lines.Line2D at 0x7fd859706cf8>]

Here is an idea of what they look like

OD (from surface)

In [9]:
_ = hsrl_utils.plot.plot_plain_image(od.T,None,None)

$\beta_a$ (log10)

In [10]:
_ = hsrl_utils.plot.plot_plain_image(np.log10(ba.T),-8,-1)
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:1: RuntimeWarning: divide by zero encountered in log10
  """Entry point for launching an IPython kernel.

Depol

In [11]:
_ = hsrl_utils.plot.plot_plain_image(depol.T,0,.6)

Color ratio

In [12]:
_ = hsrl_utils.plot.plot_plain_image(color.T,0,2)

Diseased

In [13]:
rgb = hsrl_utils.plot._get_diseased_rgb(ba,depol,color,qc)
_ = hsrl_utils.plot.plot_plain_image(rgb,None,None)
/home/cphillips/hsrl_utils/hsrl_utils/plot.py:48: RuntimeWarning: divide by zero encountered in log10
  return ((np.log10(ba) + 8) / (8-3)).clip(0,1)
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:983: RuntimeWarning: invalid value encountered in less
  if np.any(z < 0):
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:984: RuntimeWarning: invalid value encountered in less
  invalid = np.nonzero(z < 0)
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:985: UserWarning: Color data out of range: Z < 0 in 1022 pixels
  warn('Color data out of range: Z < 0 in %s pixels' % invalid[0].size)
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:990: RuntimeWarning: invalid value encountered in greater
  mask = out > 0.2068966
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:633: RuntimeWarning: invalid value encountered in greater
  mask = arr > 0.0031308
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:636: RuntimeWarning: invalid value encountered in less
  arr[arr < 0] = 0
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:637: RuntimeWarning: invalid value encountered in greater
  arr[arr > 1] = 1

Segmentation

In [14]:
# Fill missing values
rgb2 = rgb.data.copy()
rgb2[np.isnan(rgb)] = 0
In [15]:
# Use k-means segmentation on diseased image (above)
# Find 1000 segments
seg = skimage.segmentation.slic(rgb2,n_segments=1000)

Here is what the segments look like

It should be as oversegmented as possible without losing benefit of averaging statistics

In [16]:
import skimage.transform
In [17]:
_ = hsrl_utils.plot.plot_plain_image(
    skimage.transform.rescale(
        skimage.segmentation.mark_boundaries(rgb2, seg),
        (2,2),
    mode='constant'),None,None
)

Process segments to get bulk lidar ratios

The basic process for each segment is

  1. Get the column-wise optical depth by subtracting the optical depth at the bottom from the top
  2. Find the mean backscatter within the segment
In [18]:
%%time

M_PER_PIXEL = 120

# Allocate output
ext = np.zeros_like(seg, dtype=np.float64)
sca = np.zeros_like(seg, dtype=np.float64)

# For each superpixel segment
for i in set(seg.flatten()):
    # Compute a segment mask
    subseg = (seg == i)*1
    # Take the vertical derivative of the mask
    # This will be -1 for top borders, +1 for bottom borders
    delta_mask = np.diff(np.pad(subseg, pad_width=((1,1),(0,0)), mode='constant'), axis=0)
    
    # We take the dot product with optical depth to get (+1 * od_bottom) + (-1 * od_top)
    # negate to get od_top - od_bottom or segment column optical thickness
    od_pad = np.pad(od.T.values, pad_width=((0,2),(0,0)), mode='edge')[1:,:]
    od_above = (delta_mask < 0) * od_pad
    od_below = (delta_mask > 0) * od_pad
    od_col = np.nansum(od_above - od_below, axis=0)
    
    # We find the columnwise mean of beta_a too
    scatter_col = np.nanmean((subseg*ba.T.values)[subseg==1],axis=0)
    
    # Compute the columnwise height weights of the segment
    col_weight = subseg.sum(axis=0,dtype=np.float64)
    col_weight /= col_weight.sum()
    
    # Find the column height-weighted mean values for od
    # beta_e = tau / dx
    beta_e = od_col / (subseg.sum(axis=0,dtype=np.float64) * M_PER_PIXEL)
    mean_ext = np.nansum(beta_e * col_weight)
    
    # Find the column height-weighted mean values for backscatter
    mean_sca = np.nansum(scatter_col * col_weight)
    
    # Write
    ext[subseg==1] = mean_ext
    sca[subseg==1] = mean_sca
    #if i == 230: break
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:24: RuntimeWarning: Mean of empty slice
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:32: RuntimeWarning: invalid value encountered in true_divide
CPU times: user 917 ms, sys: 0 ns, total: 917 ms
Wall time: 916 ms

Results

First test that if we integrate the processed bulk extinction we get back the total

In [19]:
plot(od.T.values[-1] - od.T.values[1], label='Total Optical Depth')
plot((ext).sum(axis=0) * M_PER_PIXEL, label='Integrated Segmented Extinction')
title('Optical Depth Closure')
legend(loc='best')
xlabel('Time (2016-05-04 00:00 + i*5 min)')
_ = ylabel('Optical Depth')

Check on the bulk backscatter and extinction

In [20]:
## BACKSCATTER
_ = hsrl_utils.plot.plot_plain_image(np.log10(sca), -8, -3)
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: divide by zero encountered in log10
  
In [21]:
## EXTINCTION
_ = hsrl_utils.plot.plot_plain_image(np.log10(ext), -6, -1)
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: divide by zero encountered in log10
  
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in log10
  

Finally, the lidar ratio

In [22]:
figure(figsize=(8,4))
imshow(ext/sca, vmin=0, vmax=180, origin='lower')
colorbar()
gca().get_xaxis().set_visible(False)
gca().get_yaxis().set_visible(False)
_ = title('Bulk Lidar Ratio')
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: divide by zero encountered in true_divide
  
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide
  

Compared to taking a simple finite difference for extinction

In [23]:
figure(figsize=(8,4))
_ext = np.diff(od.T,axis=0)
_ba = ba.T.values[1:,:]
imshow( _ext / _ba / M_PER_PIXEL, vmin=0,vmax=180,origin='lower')
gca().get_xaxis().set_visible(False)
gca().get_yaxis().set_visible(False)
title('Lidar ratio from finite differences')
_ = colorbar()
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:4: RuntimeWarning: divide by zero encountered in true_divide
  after removing the cwd from sys.path.

Validate with AERONET

In [24]:
import pandas as pd
In [25]:
df = pd.read_csv('/data/cphillips/aeronet/20160401_20170630_Seoul_SNU.lev15.csv',index_col=0)
df.index = pd.DatetimeIndex(df.index)
In [26]:
AOD_532nm = df.apply(lambda i: np.interp([532],[500,675],[i.AOD_500nm,i.AOD_675nm])[0], axis=1)
In [28]:
AOD_532nm.loc['2016-05-04':'2016-05-04 12:00'].plot(label='AERONET')
pd.Series((ext.sum(axis=0)*M_PER_PIXEL),
          index=ba.index).loc['2016-05-04':'2016-05-04 12:00'].plot(label='Summed HSRL bulk extinction')
title('AOD (AERONET vs HSRL)')
ylim([0,ylim()[1]])
ylabel('AOD')
xlabel('Time')
_ = legend(loc='best')

So it's way off