%pylab inline

Populating the interactive namespace from numpy and matplotlib

%load_ext autoreload
%autoreload 2

import hsrl_utils.io

import netCDF4

import hsrl_utils.plot

import skimage.segmentation

Read obs¶

nc = netCDF4.Dataset('/data/cphillips/DOE_TEST_CASES/ahsrl_run13/out/ahsrl_20160504T0000_20160505T0000.nc')
ba = hsrl_utils.io.read_ba(nc)
depol = hsrl_utils.io.read_depol(nc)
color = hsrl_utils.io.read_color_ratio(nc)
od = hsrl_utils.io.read_od(nc)
qc = hsrl_utils.io.read_qc(nc)
nc.close()

/home/cphillips/hsrl_utils/hsrl_utils/io.py:50: RuntimeWarning: invalid value encountered in greater
  ba = nc.variables[k][:]

# Correct backscatter because the column sum is negative nearly everywhere
plot(np.nansum(ba.values.T, axis=0))
ba = ba.clip(0,None)
plot(np.nansum(ba.T, axis=0))

[<matplotlib.lines.Line2D at 0x7fd859706cf8>]

Here is an idea of what they look like¶

OD (from surface)¶

_ = hsrl_utils.plot.plot_plain_image(od.T,None,None)

$\beta_a$ (log10)¶

_ = hsrl_utils.plot.plot_plain_image(np.log10(ba.T),-8,-1)

/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:1: RuntimeWarning: divide by zero encountered in log10
  """Entry point for launching an IPython kernel.

Depol¶

_ = hsrl_utils.plot.plot_plain_image(depol.T,0,.6)

Color ratio¶

_ = hsrl_utils.plot.plot_plain_image(color.T,0,2)

Diseased¶

rgb = hsrl_utils.plot._get_diseased_rgb(ba,depol,color,qc)
_ = hsrl_utils.plot.plot_plain_image(rgb,None,None)

/home/cphillips/hsrl_utils/hsrl_utils/plot.py:48: RuntimeWarning: divide by zero encountered in log10
  return ((np.log10(ba) + 8) / (8-3)).clip(0,1)
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:983: RuntimeWarning: invalid value encountered in less
  if np.any(z < 0):
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:984: RuntimeWarning: invalid value encountered in less
  invalid = np.nonzero(z < 0)
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:985: UserWarning: Color data out of range: Z < 0 in 1022 pixels
  warn('Color data out of range: Z < 0 in %s pixels' % invalid[0].size)
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:990: RuntimeWarning: invalid value encountered in greater
  mask = out > 0.2068966
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:633: RuntimeWarning: invalid value encountered in greater
  mask = arr > 0.0031308
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:636: RuntimeWarning: invalid value encountered in less
  arr[arr < 0] = 0
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/skimage/color/colorconv.py:637: RuntimeWarning: invalid value encountered in greater
  arr[arr > 1] = 1

Segmentation¶

# Fill missing values
rgb2 = rgb.data.copy()
rgb2[np.isnan(rgb)] = 0

# Use k-means segmentation on diseased image (above)
# Find 1000 segments
seg = skimage.segmentation.slic(rgb2,n_segments=1000)

Here is what the segments look like¶

It should be as oversegmented as possible without losing benefit of averaging statistics

import skimage.transform

_ = hsrl_utils.plot.plot_plain_image(
    skimage.transform.rescale(
        skimage.segmentation.mark_boundaries(rgb2, seg),
        (2,2),
    mode='constant'),None,None
)

Process segments to get bulk lidar ratios¶

The basic process for each segment is

Get the column-wise optical depth by subtracting the optical depth at the bottom from the top
Find the mean backscatter within the segment

%%time

M_PER_PIXEL = 120

# Allocate output
ext = np.zeros_like(seg, dtype=np.float64)
sca = np.zeros_like(seg, dtype=np.float64)

# For each superpixel segment
for i in set(seg.flatten()):
    # Compute a segment mask
    subseg = (seg == i)*1
    # Take the vertical derivative of the mask
    # This will be -1 for top borders, +1 for bottom borders
    delta_mask = np.diff(np.pad(subseg, pad_width=((1,1),(0,0)), mode='constant'), axis=0)
    
    # We take the dot product with optical depth to get (+1 * od_bottom) + (-1 * od_top)
    # negate to get od_top - od_bottom or segment column optical thickness
    od_pad = np.pad(od.T.values, pad_width=((0,2),(0,0)), mode='edge')[1:,:]
    od_above = (delta_mask < 0) * od_pad
    od_below = (delta_mask > 0) * od_pad
    od_col = np.nansum(od_above - od_below, axis=0)
    
    # We find the columnwise mean of beta_a too
    scatter_col = np.nanmean((subseg*ba.T.values)[subseg==1],axis=0)
    
    # Compute the columnwise height weights of the segment
    col_weight = subseg.sum(axis=0,dtype=np.float64)
    col_weight /= col_weight.sum()
    
    # Find the column height-weighted mean values for od
    # beta_e = tau / dx
    beta_e = od_col / (subseg.sum(axis=0,dtype=np.float64) * M_PER_PIXEL)
    mean_ext = np.nansum(beta_e * col_weight)
    
    # Find the column height-weighted mean values for backscatter
    mean_sca = np.nansum(scatter_col * col_weight)
    
    # Write
    ext[subseg==1] = mean_ext
    sca[subseg==1] = mean_sca
    #if i == 230: break

/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:24: RuntimeWarning: Mean of empty slice
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:32: RuntimeWarning: invalid value encountered in true_divide

CPU times: user 917 ms, sys: 0 ns, total: 917 ms
Wall time: 916 ms

Results¶

First test that if we integrate the processed bulk extinction we get back the total¶

plot(od.T.values[-1] - od.T.values[1], label='Total Optical Depth')
plot((ext).sum(axis=0) * M_PER_PIXEL, label='Integrated Segmented Extinction')
title('Optical Depth Closure')
legend(loc='best')
xlabel('Time (2016-05-04 00:00 + i*5 min)')
_ = ylabel('Optical Depth')

Check on the bulk backscatter and extinction¶

## BACKSCATTER
_ = hsrl_utils.plot.plot_plain_image(np.log10(sca), -8, -3)

/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: divide by zero encountered in log10

## EXTINCTION
_ = hsrl_utils.plot.plot_plain_image(np.log10(ext), -6, -1)

/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: divide by zero encountered in log10
  
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in log10

Finally, the lidar ratio¶

figure(figsize=(8,4))
imshow(ext/sca, vmin=0, vmax=180, origin='lower')
colorbar()
gca().get_xaxis().set_visible(False)
gca().get_yaxis().set_visible(False)
_ = title('Bulk Lidar Ratio')

/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: divide by zero encountered in true_divide
  
/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide

Compared to taking a simple finite difference for extinction¶

figure(figsize=(8,4))
_ext = np.diff(od.T,axis=0)
_ba = ba.T.values[1:,:]
imshow( _ext / _ba / M_PER_PIXEL, vmin=0,vmax=180,origin='lower')
gca().get_xaxis().set_visible(False)
gca().get_yaxis().set_visible(False)
title('Lidar ratio from finite differences')
_ = colorbar()

/data/cphillips/anaconda2/envs/dsb/lib/python3.6/site-packages/ipykernel_launcher.py:4: RuntimeWarning: divide by zero encountered in true_divide
  after removing the cwd from sys.path.

Validate with AERONET¶

import pandas as pd

df = pd.read_csv('/data/cphillips/aeronet/20160401_20170630_Seoul_SNU.lev15.csv',index_col=0)
df.index = pd.DatetimeIndex(df.index)

AOD_532nm = df.apply(lambda i: np.interp([532],[500,675],[i.AOD_500nm,i.AOD_675nm])[0], axis=1)

AOD_532nm.loc['2016-05-04':'2016-05-04 12:00'].plot(label='AERONET')
pd.Series((ext.sum(axis=0)*M_PER_PIXEL),
          index=ba.index).loc['2016-05-04':'2016-05-04 12:00'].plot(label='Summed HSRL bulk extinction')
title('AOD (AERONET vs HSRL)')
ylim([0,ylim()[1]])
ylabel('AOD')
xlabel('Time')
_ = legend(loc='best')

So it's way off