Implement support for NCStream and CDMRemote.

joelrahman · May 21, 2015 · bb55d99 · bb55d99
1 parent ebe0a4a
commit bb55d99
Show file tree

Hide file tree

Showing 32 changed files with 59,244 additions and 7 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -1,2 +1,2 @@
 [run]
-omit = siphon/_version.py
+omit = siphon/_version.py,siphon/cdmr/ncStream_pb2.py
diff --git a/.gitignore b/.gitignore
@@ -33,6 +33,7 @@ develop-eggs
 lib
 lib64
 __pycache__
+.ipynb_checkpoints/
 
 # Installer logs
 pip-log.txt

diff --git a/.prospector.yaml b/.prospector.yaml
@@ -3,6 +3,7 @@ ignore-paths:
   - build
   - versioneer.py
   - siphon/_version.py
+  - siphon/cdmr/ncStream_pb2.py
   - examples
 
 pylint:

diff --git a/TODO.md b/TODO.md
@@ -0,0 +1,16 @@
+Things to get done before next release:
+- [ ] TDSCatalog and co. design review
+- [x] Refactor CDMR support
+  - [x] CDMR web-api support should be stand-alone
+  - [x] NCStream parsing -- as good as justified without need for more
+- [ ] Complete implementation of spec
+  - [x] unsigned handling
+  - [x] compression
+  - [ ] structure
+  - [ ] seq
+  - [x] opaque ?
+  - [x] enums
+- [x] Profile and optimize
+- [x] Benchmark against opendap and local netcdf
+- [x] Enable Travis
+- [ ] Comment and document code (once API is more stable)
diff --git a/examples/notebooks/cdmr/Basic CDMRemote Demo.ipynb b/examples/notebooks/cdmr/Basic CDMRemote Demo.ipynb
diff --git a/examples/notebooks/cdmr/Benchmarks.ipynb b/examples/notebooks/cdmr/Benchmarks.ipynb
@@ -0,0 +1,235 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:ebe3df16b29aaadc28ce95c8c4cdcc92aac907b71c90f41a106dad2d95e7ccc1"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import netCDF4\n",
+      "from pyudl.cdmr import Dataset\n",
+      "from pyudl.tds import TDSCatalog\n",
+      "\n",
+      "tstvar = \"Geopotential_height_isobaric\""
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 34
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "cat = TDSCatalog('http://localhost:8080/thredds/catalog.xml?dataset=HRRR_nc3')\n",
+      "url = cat.datasets.values()[0].accessUrls['CdmRemote']"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 55
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "%%timeit -n 1 -r 1\n",
+      "ds = Dataset(url)\n",
+      "#lcc_info = ds.variables['LambertConformal_Projection']\n",
+      "temp = ds.variables[tstvar]\n",
+      "#x = ds.variables['x'][:] * 1000.\n",
+      "#y = ds.variables['y'][:] * 1000.\n",
+      "tempData = temp[0, :]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "1 loops, best of 1: 4.88 s per loop\n"
+       ]
+      }
+     ],
+     "prompt_number": 56
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "%%timeit -n 1 -r 1\n",
+      "ds = netCDF4.Dataset(cat.datasets.values()[0].accessUrls['OPENDAP'])\n",
+      "#lcc_info = ds.variables['LambertConformal_Projection']\n",
+      "temp = ds.variables[tstvar]\n",
+      "#x = ds.variables['x'][:] * 1000.\n",
+      "#y = ds.variables['y'][:] * 1000.\n",
+      "tempData = temp[1, :]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "1 loops, best of 1: 8.9 s per loop\n"
+       ]
+      }
+     ],
+     "prompt_number": 57
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#%timeit -n 1 -r 1 ds = Dataset(url)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 58
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#%timeit -n 1 -r 1 ds = netCDF4.Dataset(cat.datasets.values()[0].accessUrls['OPENDAP'])"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 59
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "%%timeit -n 1 -r 1\n",
+      "ds = netCDF4.Dataset(\"/Users/lesserwhirls/.unidata/tds/content/thredds/public/testdata/HRRR.nc\")\n",
+      "#lcc_info = ds.variables['LambertConformal_Projection']\n",
+      "temp = ds.variables[tstvar]\n",
+      "#x = ds.variables['x'][:] * 1000.\n",
+      "#y = ds.variables['y'][:] * 1000.\n",
+      "tempData = temp[2, :]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "1 loops, best of 1: 922 ms per loop\n"
+       ]
+      }
+     ],
+     "prompt_number": 60
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "<BR>\n",
+      "# netCDF-4\n",
+      "<BR>"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "cat = TDSCatalog('http://localhost:8080/thredds/catalog.xml?dataset=HRRR_nc4')\n",
+      "url = cat.datasets.values()[0].accessUrls['CdmRemote']"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 61
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "%%timeit -n 1 -r 1\n",
+      "ds = Dataset(url)\n",
+      "#lcc_info = ds.variables['LambertConformal_Projection']\n",
+      "temp = ds.variables[tstvar]\n",
+      "#x = ds.variables['x'][:] * 1000.\n",
+      "#y = ds.variables['y'][:] * 1000.\n",
+      "tempData = temp[0, :]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "1 loops, best of 1: 7.39 s per loop\n"
+       ]
+      }
+     ],
+     "prompt_number": 62
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "%%timeit -n 1 -r 1\n",
+      "ds = netCDF4.Dataset(cat.datasets.values()[0].accessUrls['OPENDAP'])\n",
+      "#lcc_info = ds.variables['LambertConformal_Projection']\n",
+      "temp = ds.variables[tstvar]\n",
+      "#x = ds.variables['x'][:] * 1000.\n",
+      "#y = ds.variables['y'][:] * 1000.\n",
+      "tempData = temp[1, :]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "1 loops, best of 1: 10.8 s per loop\n"
+       ]
+      }
+     ],
+     "prompt_number": 63
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "%%timeit -n 1 -r 1\n",
+      "ds = netCDF4.Dataset(\"/Users/lesserwhirls/.unidata/tds/content/thredds/public/testdata/HRRR.nc4\")\n",
+      "#lcc_info = ds.variables['LambertConformal_Projection']\n",
+      "temp = ds.variables[tstvar]\n",
+      "#x = ds.variables['x'][:] * 1000.\n",
+      "#y = ds.variables['y'][:] * 1000.\n",
+      "tempData = temp[2, :]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "1 loops, best of 1: 3.28 s per loop\n"
+       ]
+      }
+     ],
+     "prompt_number": 64
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,6 @@
 [flake8]
 max-line-length = 90
+exclude = siphon/cdmr/ncStream_pb2.py
 
 [metadata]
 description-file = README.rst

diff --git a/siphon/__init__.py b/siphon/__init__.py
@@ -1,7 +1,8 @@
 # use __init__.py to setup the namespace
 from . import tds
+from . import cdmr
 from ._version import get_versions
 __version__ = get_versions()['version']
 del get_versions
 
-__all__ = ['tds']
+__all__ = ['tds', 'cdmr']
diff --git a/siphon/cdmr/__init__.py b/siphon/cdmr/__init__.py
@@ -0,0 +1,3 @@
+from .dataset import Dataset
+
+__all__ = ['Dataset']
diff --git a/siphon/cdmr/cdmremote.py b/siphon/cdmr/cdmremote.py
@@ -0,0 +1,75 @@
+try:
+    from urllib2 import build_opener, HTTPError
+except ImportError:
+    from urllib.request import build_opener, HTTPError
+
+from .ncstream import read_ncstream_messages
+from .._version import get_versions
+__version__ = get_versions()['version']
+
+
+class CDMRemote(object):
+    # Create a custom url opener to add a user agent
+    opener = build_opener()
+    opener.addheaders = [('User-agent', 'Siphon v%s CDMRemote Reader' % __version__)]
+
+    def __init__(self, url):
+        self.url = url
+        self.responseHandler = read_ncstream_messages
+
+    def _fetch(self, url):
+        try:
+            return self.responseHandler(self.opener.open(url))
+        except HTTPError:
+            print('Error accessing: ' + url)
+            raise
+
+    def fetch_capabilities(self):
+        url = self.query_url(req='capabilities')
+        return self._fetch(url)
+
+    def fetch_cdl(self):
+        url = self.query_url(req='CDL')
+        return self._fetch(url)
+
+    def fetch_data(self, **var):
+        varstr = ','.join(name + self._convert_indices(ind)
+                          for name, ind in var.items())
+        url = self.query_url(req='data', var=varstr)
+        return self._fetch(url)
+
+    def fetch_header(self):
+        url = self.query_url(req='header')
+        return self._fetch(url)
+
+    def fetch_ncml(self):
+        url = self.query_url(req='NcML')
+        return self._fetch(url)
+
+    def query_url(self, **kw):
+        query = '&'.join('%s=%s' % i for i in kw.items())
+        return '?'.join((self.url, query))
+
+    @staticmethod
+    def _convert_indices(ind):
+        reqs = []
+        subset = False
+        for i in ind:
+            if isinstance(i, slice):
+                if i.start is None and i.stop is None and i.step is None:
+                    reqs.append(':')
+                else:
+                    subset = True
+                    # Adjust for CDMRemote weird inclusive range
+                    slice_str = str(i.start) + ':' + str(i.stop - 1)
+
+                    # Add step if necessary
+                    if i.step:
+                        slice_str += ':' + str(i.step)
+
+                    reqs.append(slice_str)
+            else:
+                reqs.append(str(i))
+                subset = True
+
+        return '(' + ','.join(reqs) + ')' if subset else ''
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .dataset import Dataset

		__all__ = ['Dataset']