Merge branch 'CLIMATE-938' of https://github.com/MichaelArthurAnderson/climate

commit: 8e1edc873c9c2bcc68a5f14dd59299c15bdcdb51 [log] [tgz]
author: Lewis John McGibbney <lewis.mcgibbney@gmail.com> Tue Mar 13 10:16:47 2018 -0700
committer: Lewis John McGibbney <lewis.mcgibbney@gmail.com> Tue Mar 13 10:16:47 2018 -0700
tree: af9066062412b96ad076fdfca9910d6fa3ab7030
parent: 5058b38985a81e338817f9b52d251fe342b3928c [diff]
parent: 38fedad3c79bed9cf9feea7bdaee158008c07701 [diff]
diff --git a/RCMES/CORDEX/cordex.py b/RCMES/CORDEX/cordex.py
index 24ce0c6..4b4a4e8 100644
--- a/RCMES/CORDEX/cordex.py
+++ b/RCMES/CORDEX/cordex.py

@@ -1,57 +1,62 @@
-import os

-import sys

-import subprocess

-import jinja2

-from metadata_extractor import CORDEXMetadataExtractor, obs4MIPSMetadataExtractor

-

-# These should be modified. TODO: domains can also be made into separate group

-# CORDEX domain

-domain = 'NAM-44'

-

-# The output directory

-workdir = '/home/goodman/data_processing/CORDEX/analysis'

-

-# Location of osb4Mips files

-obs_dir = '/proj3/data/obs4mips'

-

-# Location of CORDEX files

-models_dir = '/proj3/data/CORDEX/{domain}/*'.format(domain=domain)

-

-# Extract metadata from model and obs files, pairing up files with the same

-# variables for separate evaluations

-obs_extractor = obs4MIPSMetadataExtractor(obs_dir)

-models_extractor = CORDEXMetadataExtractor(models_dir)

-groups = obs_extractor.group(models_extractor, 'variable')

-

-# Configuration file template, to be rendered repeatedly for each evaluation

-# run

-env =  jinja2.Environment(loader=jinja2.FileSystemLoader('./templates'),

-                          trim_blocks=True, lstrip_blocks=True)

-t = env.get_template('CORDEX.yaml.template')

-

-# Each group represents a single evaluation. Repeat the evaluation for

-# three seasons: Summer, Winter, and Annual.

-seasons = ['annual', 'winter', 'summer']

-for group in groups:

-    obs_info, models_info = group

-    instrument = obs_info['instrument']

-    variable = obs_info['variable']

-    for season in seasons:

-        configfile_basename = '_'.join([domain, instrument, variable, season]) + '.yaml'

+import os
+import sys
+import subprocess
+import jinja2
+from metadata_extractor import CORDEXMetadataExtractor, obs4MIPSMetadataExtractor
+
+# These should be modified. TODO: domains can also be made into separate group
+# CORDEX domain
+
+user_input = sys.argv[1:]
+if len(user_input) == 4:
+    domain, workdir, obs_dir, models_dir = user_input[:]
+else:
+    domain = 'NAM-44'
+
+    # The output directory
+    workdir = os.getcwd()+'/'+domain+'_analysis'
+
+    # Location of osb4Mips files
+    obs_dir = '/proj3/data/obs4mips'
+
+    # Location of CORDEX files
+    models_dir = '/proj3/data/CORDEX/{domain}/*'.format(domain=domain)
+
+# Extract metadata from model and obs files, pairing up files with the same
+# variables for separate evaluations
+obs_extractor = obs4MIPSMetadataExtractor(obs_dir)
+models_extractor = CORDEXMetadataExtractor(models_dir)
+groups = obs_extractor.group(models_extractor, 'variable')
+
+# Configuration file template, to be rendered repeatedly for each evaluation
+# run
+env =  jinja2.Environment(loader=jinja2.FileSystemLoader('./templates'),
+                          trim_blocks=True, lstrip_blocks=True)
+t = env.get_template('CORDEX.yaml.template')
+
+# Each group represents a single evaluation. Repeat the evaluation for
+# three seasons: Summer, Winter, and Annual.
+seasons = ['annual', 'winter', 'summer']
+errored = []
+for group in groups:
+    obs_info, models_info = group
+    instrument = obs_info['instrument']
+    variable = obs_info['variable']
+    for season in seasons:
+        configfile_basename = '_'.join([domain, instrument, variable, season]) + '.yaml'
         configfile_path = os.path.join(workdir, domain, instrument,
-                                       variable, season)

-        if not os.path.exists(configfile_path):

-            os.makedirs(configfile_path)

-        configfile_path = os.path.join(configfile_path, configfile_basename)

-        with open(configfile_path, 'w') as configfile:

-            configfile.write(t.render(obs_info=obs_info, models_info=models_info,

-                                      season=season, output_dir=workdir))

-

-        # TODO: Do this in parallel. Will change this once this approach

-        # is well tested.

-        code = subprocess.call([sys.executable, '../run_RCMES.py', configfile_path])

-        errored = []

-        if code:

-            errored.append(configfile_path)

-

-print("All runs done. The following ended with an error: {}".format(errored))

+                                       variable, season)
+        if not os.path.exists(configfile_path):
+            os.makedirs(configfile_path)
+        configfile_path = os.path.join(configfile_path, configfile_basename)
+        with open(configfile_path, 'w') as configfile:
+            configfile.write(t.render(obs_info=obs_info, models_info=models_info,
+                                      season=season, output_dir=workdir))
+
+        # TODO: Do this in parallel. Will change this once this approach
+        # is well tested.
+        code = subprocess.call([sys.executable, '../run_RCMES.py', configfile_path])
+        if code:
+            errored.append(configfile_path)
+
+print("All runs done. The following ended with an error: {}".format(errored))

diff --git a/examples/esgf_integration_example.py b/examples/esgf_integration_example.py
index e939927..e541273 100644
--- a/examples/esgf_integration_example.py
+++ b/examples/esgf_integration_example.py

@@ -30,36 +30,46 @@
 
 """
 
-import ocw.data_source.esgf as esgf
-from getpass import getpass
+from __future__ import print_function
+
 import ssl
 import sys
+from getpass import getpass
 
-if hasattr(ssl, '_create_unverified_context'):
-    ssl._create_default_https_context = ssl._create_unverified_context
+import ocw.data_source.esgf as esgf
 
-dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
-variable = 'zosStderr'
 
-if sys.version_info[0] >= 3:
-    username = input('Enter your ESGF OpenID:\n')
-else:
-    username = raw_input('Enter your ESGF OpenID:\n')
+def main():
+    """
+    An example of using the OCW ESGF library.  Connects to an ESGF
+    server and downloads a dataset.
+    """
+    if hasattr(ssl, '_create_unverified_context'):
+        ssl._create_default_https_context = ssl._create_unverified_context
 
-password = getpass(prompt='Enter your ESGF Password:\n')
+    dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
+    variable = 'zosStderr'
 
-# Multiple datasets are returned in a list if the ESGF dataset is
-# divided into multiple files.
-datasets = esgf.load_dataset(dataset_id,
-                             variable,
-                             username,
-                             password)
+    if sys.version_info[0] >= 3:
+        username = input('Enter your ESGF OpenID:\n')
+    else:
+        username = raw_input('Enter your ESGF OpenID:\n')
 
-# For this example, our dataset is only stored in a single file so
-# we only need to look at the 0-th value in the returned list.
-ds = datasets[0]
+    password = getpass(prompt='Enter your ESGF Password:\n')
 
-print('\n--------\n')
-print('Variable: ', ds.variable)
-print('Shape: ', ds.values.shape)
-print('A Value: ', ds.values[100][100][100])
+    # Multiple datasets are returned in a list if the ESGF dataset is
+    # divided into multiple files.
+    datasets = esgf.load_dataset(dataset_id, variable, username, password)
+
+    # For this example, our dataset is only stored in a single file so
+    # we only need to look at the 0-th value in the returned list.
+    dataset = datasets[0]
+
+    print('\n--------\n')
+    print('Variable: ', dataset.variable)
+    print('Shape: ', dataset.values.shape)
+    print('A Value: ', dataset.values[100][100][100])
+
+
+if __name__ == '__main__':
+    main()

diff --git a/examples/model_ensemble_to_rcmed.py b/examples/model_ensemble_to_rcmed.py
index 185aa2e..787367b 100644
--- a/examples/model_ensemble_to_rcmed.py
+++ b/examples/model_ensemble_to_rcmed.py

@@ -130,14 +130,10 @@
 
 cru_start = datetime.datetime.strptime(cru_31['start_date'], "%Y-%m-%d")
 cru_end = datetime.datetime.strptime(cru_31['end_date'], "%Y-%m-%d")
-knmi_start, knmi_end = knmi_dataset.temporal_boundaries()
 # Set the Time Range to be the year 1989
 start_time = datetime.datetime(1989, 1, 1)
 end_time = datetime.datetime(1989, 12, 1)
 
-print("Time Range is: %s to %s" % (start_time.strftime("%Y-%m-%d"),
-                                   end_time.strftime("%Y-%m-%d")))
-
 print("Fetching data from RCMED...")
 cru31_dataset = rcmed.parameter_dataset(dataset_id,
                                         parameter_id,
@@ -150,15 +146,29 @@
 
 """ Step 3: Resample Datasets so they are the same shape """
 
+# Running Temporal Rebin early helps negate the issue of datasets being on different
+# days of the month (1st vs. 15th)
 print("Temporally Rebinning the Datasets to an Annual Timestep")
 # To run annual temporal Rebinning,
 knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='annual')
+dataset_start, dataset_end = knmi_dataset.temporal_boundaries()
+start_time = max([start_time, dataset_start])
+end_time = min([end_time, dataset_end])
+
 wrf311_dataset = dsp.temporal_rebin(
     wrf311_dataset, temporal_resolution='annual')
-cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution='annual')
+dataset_start, dataset_end = wrf311_dataset.temporal_boundaries()
+start_time = max([start_time, dataset_start])
+end_time = min([end_time, dataset_end])
 
-# Running Temporal Rebin early helps negate the issue of datasets being on different
-# days of the month (1st vs. 15th)
+cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution='annual')
+dataset_start, dataset_end = cru31_dataset.temporal_boundaries()
+start_time = max([start_time, dataset_start])
+end_time = min([end_time, dataset_end])
+
+print("Time Range is: %s to %s" % (start_time.strftime("%Y-%m-%d"),
+                                   end_time.strftime("%Y-%m-%d")))
+
 # Create a Bounds object to use for subsetting
 new_bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon,
                     lon_max=max_lon, start=start_time, end=end_time)
@@ -214,7 +224,7 @@
 lats = new_lats
 lons = new_lons
 fname = OUTPUT_PLOT
-gridshape = (3, 1)  # Using a 3 x 1 since we have a 1 year of data for 3 models
+gridshape = (3, start_time.year - end_time.year + 1)  # Using a 3 x N since we have a N year(s) of data for 3 models
 plotnames = ["KNMI", "WRF311", "ENSEMBLE"]
 for i in np.arange(3):
     plot_title = "TASMAX Bias of CRU 3.1 vs. %s (%s - %s)" % (

diff --git a/ocw-ui/backend/processing.py b/ocw-ui/backend/processing.py
index 07375d8..6d8a1a1 100644
--- a/ocw-ui/backend/processing.py
+++ b/ocw-ui/backend/processing.py

@@ -216,8 +216,9 @@
                        in target_datasets]
     
     # Do temporal re-bin based off of passed resolution
-    ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta)
-    target_datasets = [dsp.temporal_rebin(ds, time_delta)
+    temporal_resolution_type = data['temporal_resolution_type']
+    ref_dataset = dsp.temporal_rebin(ref_dataset, temporal_resolution_type)
+    target_datasets = [dsp.temporal_rebin(ds, temporal_resolution_type)
                        for ds
                        in target_datasets]
 

diff --git a/ocw/data_source/esgf.py b/ocw/data_source/esgf.py
index 0dcc2e0..6b2f042 100644
--- a/ocw/data_source/esgf.py
+++ b/ocw/data_source/esgf.py

@@ -16,9 +16,26 @@
 # specific language governing permissions and limitations
 # under the License.
 #
+"""
+A set of functions to wrap downloading ESGF datasets into an OCW dataset object.
 
+*** Note *** The ESGF data source requires that the user have certain credentials downloaded from
+the ESG. The current version of the module should download these automatically.  Older versions of
+the library will not download them. The solution is to use the WGET script from the EGS to download
+a test dataset to get the credentials. The data source should work as expected then.
+
+"""
 import os
 import sys
+
+import requests
+from bs4 import BeautifulSoup
+
+import ocw.data_source.local as local
+from ocw.esgf.constants import DEFAULT_ESGF_SEARCH
+from ocw.esgf.download import download
+from ocw.esgf.logon import logon
+
 if sys.version_info[0] >= 3:
     from urllib.error import HTTPError
 else:
@@ -27,15 +44,6 @@
     # might be around one day
     from urllib2 import HTTPError
 
-from ocw.esgf.constants import DEFAULT_ESGF_SEARCH
-from ocw.esgf.download import download
-from ocw.esgf.logon import logon
-from ocw.esgf.search import SearchClient
-import ocw.data_source.local as local
-
-from bs4 import BeautifulSoup
-import requests
-
 
 def load_dataset(dataset_id,
                  variable_name,
@@ -44,9 +52,8 @@
                  search_url=DEFAULT_ESGF_SEARCH,
                  elevation_index=0,
                  name='',
-                 save_path='/tmp',
-                 **additional_constraints):
-    ''' Load an ESGF dataset.
+                 save_path='/tmp'):
+    """ Load an ESGF dataset.
 
     :param dataset_id: The ESGF ID of the dataset to load.
     :type dataset_id: :mod:`string`
@@ -74,32 +81,24 @@
     :param save_path: (Optional) Path to where downloaded files should be saved.
     :type save_path: :mod:`string`
 
-    :param additional_constraints: (Optional) Additional key,value pairs to
-        pass as constraints to the search wrapper. These can be anything found
-        on the ESGF metadata page for a dataset.
-
     :returns: A :class:`list` of :class:`dataset.Dataset` contained the
         requested dataset. If the dataset is stored in multiple files each will
         be loaded into a separate :class:`dataset.Dataset`.
 
     :raises ValueError: If no dataset can be found for the supplied ID and
         variable, or if the requested dataset is a multi-file dataset.
-    '''
-    download_data = _get_file_download_data(url=search_url,
-                                            dataset_id=dataset_id,
-                                            variable=variable_name)
+    """
+    download_data = \
+        _get_file_download_data(url=search_url, dataset_id=dataset_id, variable=variable_name)
 
     datasets = []
+
     for url, var in download_data:
-        _download_files([url],
-                        esgf_username,
-                        esgf_password,
-                        download_directory=save_path)
+        _download_files([url], esgf_username, esgf_password, download_directory=save_path)
 
         file_save_path = os.path.join(save_path, url.split('/')[-1])
-        datasets.append(local.load_file(file_save_path,
-                                        var,
-                                        name=name,
+
+        datasets.append(local.load_file(file_save_path, var, name=name,
                                         elevation_index=elevation_index))
 
     origin = {
@@ -107,19 +106,20 @@
         'dataset_id': dataset_id,
         'variable': variable_name
     }
-    for ds in datasets:
-        ds.origin = origin
+
+    for dataset in datasets:
+        dataset.origin = origin
 
     return datasets
 
 
 def _get_file_download_data(dataset_id, variable, url=DEFAULT_ESGF_SEARCH):
-    ''''''
+    """"""
     url += '?type=File&dataset_id={}&variable={}'
     url = url.format(dataset_id, variable)
 
-    r = requests.get(url)
-    xml = BeautifulSoup(r.content, "html.parser")
+    raw_data = requests.get(url)
+    xml = BeautifulSoup(raw_data.content, "html.parser")
 
     dont_have_results = not bool(xml.response.result['numfound'])
 
@@ -141,7 +141,7 @@
 
 
 def _download_files(file_urls, username, password, download_directory='/tmp'):
-    ''''''
+    """"""
     try:
         logon(username, password)
     except HTTPError:

diff --git a/ocw/data_source/rcmed.py b/ocw/data_source/rcmed.py
index 0feb045..69d4628 100644
--- a/ocw/data_source/rcmed.py
+++ b/ocw/data_source/rcmed.py

@@ -339,9 +339,10 @@
     parameters_metadata = get_parameters_metadata()
     parameter_name, time_step, _, _, _, _, parameter_units = _get_parameter_info(
         parameters_metadata, parameter_id)
-    url = _generate_query_url(dataset_id, parameter_id, min_lat,
-                              max_lat, min_lon, max_lon, start_time, end_time, time_step)
-    lats, lons, times, values = _get_data(url)
+
+    lats, lons, times, values = \
+        _coalesce_data(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon,
+                       start_time, end_time, time_step)
 
     unique_lats_lons_times = _make_unique(lats, lons, times)
     unique_times = _calculate_time(unique_lats_lons_times[2], time_step)
@@ -362,3 +363,74 @@
                    units=parameter_units,
                    name=name,
                    origin=origin)
+
+
+def _coalesce_data(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon,
+                       start_time, end_time, time_step):
+
+    """
+    Refer to this JIRA:  https://issues.apache.org/jira/browse/CLIMATE-744
+
+    Sometimes RCMED does not seem to return the entire data set when the requested
+    range of data and / or number of data points are very large.  This method breaks
+    the single large query into several smaller queries and then appends the results.
+
+    :param dataset_id:  The RCMED dataset ID.
+    :param parameter_id:  The parameter ID within the RCMED dataset.
+    :param min_lat: The minimum lat of the dataset boundary.
+    :param max_lat: The maximum lat of the dataset boundary.
+    :param min_lon: The minimum lon of the dataset boundary.
+    :param max_lon: The maximum lon of the dataset boundary.
+    :param start_time: The start datetime of the dataset boundary.
+    :param end_time: The end datetime of the dataset boundary.
+    :param time_step: The timestep to use when segmenting the datetime boundary.
+    :return:  lats, lons, times, and values for the requested dataset / parameter from RCMED.
+    """
+
+    lats = None
+    lons = None
+    times = None
+    values = None
+
+    # This is a magic number which strikes a balance between making an excessive number of
+    # calls to RCMED (e.g. 1) and RCMED not sending back the full data set.
+    step = 4
+
+    current_start = start_time
+    current_end = min(end_time, datetime(current_start.year + step, 12, 31))
+
+    while True:
+
+        url = _generate_query_url(dataset_id, parameter_id, min_lat,
+                                  max_lat, min_lon, max_lon, current_start, current_end, time_step)
+
+        tmp_lats, tmp_lons, tmp_times, tmp_values = _get_data(url)
+
+        if lats is None:
+            lats = tmp_lats
+        else:
+            lats = np.append(lats, tmp_lats)
+
+        if lons is None:
+            lons = tmp_lons
+        else:
+            lons = np.append(lons, tmp_lons)
+
+        if times is None:
+            times = tmp_times
+        else:
+            times = np.append(times, tmp_times)
+
+        if values is None:
+            values = tmp_values
+        else:
+            values = np.append(values, tmp_values)
+
+        if current_end == end_time:
+            break
+
+        current_start = datetime(current_end.year + 1, 1, 1)
+        current_end = min(end_time, datetime(current_start.year + step, 12, 31))
+
+
+    return lats, lons, times, values

diff --git a/ocw/dataset.py b/ocw/dataset.py
index 0a0e1a6..bb06443 100644
--- a/ocw/dataset.py
+++ b/ocw/dataset.py

@@ -25,14 +25,12 @@
 
 '''
 
-import os
-import numpy
-import logging
 import datetime as dt
-from mpl_toolkits.basemap import Basemap
-import netCDF4
+import logging
 
-import ocw
+import netCDF4
+import numpy
+
 import ocw.utils as utils
 
 logger = logging.getLogger(__name__)
@@ -235,7 +233,7 @@
 
 
 class Bounds(object):
-    '''Container for holding spatial and temporal bounds information.
+    """Container for holding spatial and temporal bounds information.
 
     Certain operations require valid bounding information to be present for
     correct functioning. Bounds guarantees that a function receives well
@@ -245,10 +243,11 @@
     * 'rectangular'
     * 'CORDEX (CORDEX region name)': pre-defined CORDEX boundary
     * 'us_states': an array of US states abbreviation is required (ex) us_states = ['CA','NV'])
-    * 'countries': an array of county names is required (ex) countries = ['United States','Canada','Mexico']
+    * 'countries': an array of county names is required (ex) countries = ['United States','Canada']
     * 'user': user_mask_file in a netCDF format with two dimensional mask variable is required.
 
-    If boundary_type == 'rectangular', spatial and temporal bounds must follow the following guidelines.
+    If boundary_type == 'rectangular', spatial and temporal bounds must follow the
+    following guidelines.
 
     * Latitude values must be in the range [-90, 90]
     * Longitude values must be in the range [-180, 180]
@@ -256,14 +255,15 @@
       values.
 
     Temporal bounds must a valid datetime object
-    '''
+    """
 
     def __init__(self, boundary_type='rectangular',
                  us_states=None, countries=None,
-                 user_mask_file=None, mask_variable_name=None, longitude_name=None, latitude_name=None,
+                 user_mask_file=None, mask_variable_name=None,
+                 longitude_name=None, latitude_name=None,
                  lat_min=-90, lat_max=90, lon_min=-180, lon_max=180,
                  start=None, end=None):
-        '''Default Bounds constructor
+        """Default Bounds constructor
         :param boundary_type: The type of spatial subset boundary.
         :type boundary_type: :mod:`string`
 
@@ -291,89 +291,132 @@
         :type end: :class:`datetime.datetime`
 
         :raises: ValueError
-        '''
-        self.boundary_type = boundary_type
-        if start:
-            self._start = start
-        else:
-            self._start = None
+        """
 
-        if end:
+        self.boundary_type = boundary_type
+
+        self._start = None
+        self._end = None
+        self.lat_min = None
+        self.lat_max = None
+        self.lon_min = None
+        self.lon_max = None
+
+        if start and self._validate_start(start):
+            self._start = start
+
+        if end and self._validate_end(end):
             self._end = end
-        else:
-            self._end = None
 
         if boundary_type == 'us_states':
-            self.masked_regions = utils.shapefile_boundary(
-                boundary_type, us_states)
+
+            self.masked_regions = utils.shapefile_boundary(boundary_type, us_states)
+
         if boundary_type == 'countries':
-            self.masked_regions = utils.shapefile_boundary(
-                boundary_type, countries)
+
+            self.masked_regions = utils.shapefile_boundary(boundary_type, countries)
+
         if boundary_type == 'user':
+
             file_object = netCDF4.Dataset(user_mask_file)
             self.mask_variable = file_object.variables[mask_variable_name][:]
             mask_longitude = file_object.variables[longitude_name][:]
             mask_latitude = file_object.variables[latitude_name][:]
             if mask_longitude.ndim == 1 and mask_latitude.ndim == 1:
-                self.mask_longitude, self.mask_latitude = numpy.meshgrid(
-                    mask_longitude, mask_latitude)
+                self.mask_longitude, self.mask_latitude = \
+                    numpy.meshgrid(mask_longitude, mask_latitude)
             elif mask_longitude.ndim == 2 and mask_latitude.ndim == 2:
                 self.mask_longitude = mask_longitude
                 self.mask_latitude = mask_latitude
-        if boundary_type == 'rectangular':
-            if not (-90 <= float(lat_min) <= 90) or float(lat_min) > float(lat_max):
-                error = "Attempted to set lat_min to invalid value: %s" % (
-                    lat_min)
-                logger.error(error)
-                raise ValueError(error)
-            if not (-90 <= float(lat_max) <= 90):
-                error = "Attempted to set lat_max to invalid value: %s" % (
-                    lat_max)
-                logger.error(error)
-                raise ValueError(error)
-            if not (-180 <= float(lon_min) <= 180) or float(lon_min) > float(lon_max):
-                error = "Attempted to set lon_min to invalid value: %s" % (
-                    lon_min)
-                logger.error(error)
-                raise ValueError(error)
-            if not (-180 <= float(lon_max) <= 180):
-                error = "Attempted to set lat_max to invalid value: %s" % (
-                    lon_max)
-                logger.error(error)
-                raise ValueError(error)
 
-            self.lat_min = float(lat_min)
-            self.lat_max = float(lat_max)
-            self.lon_min = float(lon_min)
-            self.lon_max = float(lon_max)
+        if boundary_type == 'rectangular':
+
+            if self._validate_lat_lon(lat_max=lat_max, lat_min=lat_min, lon_max=lon_max, lon_min=lon_min):
+                self.lat_min = float(lat_min)
+                self.lat_max = float(lat_max)
+                self.lon_min = float(lon_min)
+                self.lon_max = float(lon_max)
+
         if boundary_type[:6].upper() == 'CORDEX':
-            self.lat_min, self.lat_max, self.lon_min, self.lon_max = utils.CORDEX_boundary(
-                boundary_type[6:].replace(" ", "").lower())
+
+            lat_min, lat_max, lon_min, lon_max = \
+                utils.CORDEX_boundary(boundary_type[6:].replace(" ", "").lower())
+
+            if self._validate_lat_lon(lat_max=lat_max, lat_min=lat_min, lon_max=lon_max, lon_min=lon_min):
+                self.lat_min = float(lat_min)
+                self.lat_max = float(lat_max)
+                self.lon_min = float(lon_min)
+                self.lon_max = float(lon_max)
 
     @property
     def start(self):
+        """ Getter for start attribute. """
         return self._start
 
     @start.setter
     def start(self, value):
-        if self._end:
-            if not (type(value) is dt.datetime and value < self._end):
-                error = "Attempted to set start to invalid value: %s" % (value)
-                logger.error(error)
-                raise ValueError(error)
-
-        self._start = value
+        """ Setter for start attribute. """
+        if value and self._validate_start(value):
+            self._start = value
 
     @property
     def end(self):
+        """ Getter for end attribute. """
         return self._end
 
     @end.setter
     def end(self, value):
+        """ Setter for end attribute. """
+        if value and self._validate_end(value):
+            self._end = value
+
+    def _validate_start(self, value):
+        """ Validate start is both the correct type and less than end. """
+        if not isinstance(value, dt.datetime):
+            error = "Attempted to set start to invalid type: %s" % (type(value))
+            logger.error(error)
+            raise ValueError(error)
+
+        if self._end:
+            if value > self._end:
+                error = "Attempted to set start to invalid value: %s" % (value)
+                logger.error(error)
+                raise ValueError(error)
+
+        return True
+
+    def _validate_end(self, value):
+        """ Validate end is both the correct type and greater than start. """
+        if not isinstance(value, dt.datetime):
+            error = "Attempted to set end to invalid type: %s" % (type(value))
+            logger.error(error)
+            raise ValueError(error)
+
         if self._start:
-            if not (type(value) is dt.datetime and value > self._start):
+            if value < self._start:
                 error = "Attempted to set end to invalid value: %s" % (value)
                 logger.error(error)
                 raise ValueError(error)
 
-        self._end = value
+        return True
+
+    def _validate_lat_lon(self, lat_max, lat_min, lon_max, lon_min):
+        """ Confirm the min / max lat / lon are within expected ranges. """
+        if not (-90 <= float(lat_min) <= 90) or float(lat_min) > float(lat_max):
+            error = "Attempted to set lat_min to invalid value: %s" % (lat_min)
+            logger.error(error)
+            raise ValueError(error)
+        if not -90 <= float(lat_max) <= 90:
+            error = "Attempted to set lat_max to invalid value: %s" % (lat_max)
+            logger.error(error)
+            raise ValueError(error)
+        if not (-180 <= float(lon_min) <= 180) or float(lon_min) > float(lon_max):
+            error = "Attempted to set lon_min to invalid value: %s" % (lon_min)
+            logger.error(error)
+            raise ValueError(error)
+        if not -180 <= float(lon_max) <= 180:
+            error = "Attempted to set lat_max to invalid value: %s" % (lon_max)
+            logger.error(error)
+            raise ValueError(error)
+
+        return True

diff --git a/ocw/esgf/constants.py b/ocw/esgf/constants.py
index 8d30848..90218fd 100644
--- a/ocw/esgf/constants.py
+++ b/ocw/esgf/constants.py

@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''Module containing constant parameters for ESGF RCMES integration.'''
+"""Module containing constant parameters for ESGF RCMES integration."""
 
 # default location of ESGF user credentials
 ESGF_CREDENTIALS = "~/.esg/credentials.pem"

diff --git a/ocw/esgf/download.py b/ocw/esgf/download.py
index 690915c..951a341 100644
--- a/ocw/esgf/download.py
+++ b/ocw/esgf/download.py

@@ -16,12 +16,18 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''
+"""
 OCW module to download a file from ESGF.
 
-'''
+"""
+
+from __future__ import print_function
 
 import sys
+from os.path import expanduser, join
+
+from ocw.esgf.constants import ESGF_CREDENTIALS
+
 if sys.version_info[0] >= 3:
     from http.client import HTTPSConnection
     from urllib.request import build_opener
@@ -35,15 +41,12 @@
     from urllib2 import build_opener
     from urllib2 import HTTPCookieProcessor
     from urllib2 import HTTPSHandler
-from os.path import expanduser, join
-
-from ocw.esgf.constants import ESGF_CREDENTIALS
 
 
 class HTTPSClientAuthHandler(HTTPSHandler):
-    '''
+    """
     HTTP handler that transmits an X509 certificate as part of the request
-    '''
+    """
 
     def __init__(self, key, cert):
         HTTPSHandler.__init__(self)
@@ -51,34 +54,44 @@
         self.cert = cert
 
     def https_open(self, req):
+        """
+        Opens the https connection.
+        :param req:  The https request object.
+        :return: An addinfourl object for the request.
+        """
         return self.do_open(self.getConnection, req)
 
     def getConnection(self, host, timeout=300):
-        return HTTPSConnection(host, key_file=self.key, cert_file=self.cert)
+        """
+        Create an HTTPSConnection object.
+        :param host: The ESGF server to connect to.
+        :param timeout: Connection timeout in seconds.
+        :return:
+        """
+        return HTTPSConnection(host, key_file=self.key, cert_file=self.cert, timeout=timeout)
 
 
 def download(url, toDirectory="/tmp"):
-    '''
+    """
     Function to download a single file from ESGF.
-
     :param url: the URL of the file to download
     :param toDirectory: target directory where the file will be written
-    '''
+    """
 
     # setup HTTP handler
-    certFile = expanduser(ESGF_CREDENTIALS)
-    opener = build_opener(HTTPSClientAuthHandler(certFile, certFile))
+    cert_file = expanduser(ESGF_CREDENTIALS)
+    opener = build_opener(HTTPSClientAuthHandler(cert_file, cert_file))
     opener.add_handler(HTTPCookieProcessor())
 
     # download file
-    localFilePath = join(toDirectory, url.split('/')[-1])
-    print("\nDownloading url: %s to local path: %s ..." % (url, localFilePath))
-    localFile = open(localFilePath, 'w')
-    webFile = opener.open(url)
-    localFile.write(webFile.read())
+    local_file_path = join(toDirectory, url.split('/')[-1])
+    print("\nDownloading url: %s to local path: %s ..." % (url, local_file_path))
+    local_file = open(local_file_path, 'w')
+    web_file = opener.open(url)
+    local_file.write(web_file.read())
 
     # cleanup
-    localFile.close()
-    webFile.close()
+    local_file.close()
+    web_file.close()
     opener.close()
     print("... done")

diff --git a/ocw/esgf/logon.py b/ocw/esgf/logon.py
index b792cfa..a49335d 100644
--- a/ocw/esgf/logon.py
+++ b/ocw/esgf/logon.py

@@ -16,28 +16,28 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''
+"""
 RCMES module to logon onto the ESGF.
-'''
+"""
 import os
 
 from pyesgf.logon import LogonManager
 
-from ocw.esgf.constants import JPL_MYPROXY_SERVER_DN, JPL_HOSTNAME
+from ocw.esgf.constants import JPL_HOSTNAME, JPL_MYPROXY_SERVER_DN
 
 
 def logon(openid, password):
-    '''
+    """
     Function to retrieve a short-term X.509 certificate that can be used to authenticate with ESGF.
     The certificate is written in the location ~/.esg/credentials.pem.
     The trusted CA certificates are written in the directory ~/.esg/certificates.
-    '''
+    """
     # Must configure the DN of the JPL MyProxy server if using a JPL openid
     if JPL_HOSTNAME in openid:
         os.environ['MYPROXY_SERVER_DN'] = JPL_MYPROXY_SERVER_DN
 
-    lm = LogonManager()
+    logon_manager = LogonManager()
 
-    lm.logon_with_openid(openid, password, bootstrap=True)
+    logon_manager.logon_with_openid(openid, password, bootstrap=True)
 
-    return lm.is_logged_on()
+    return logon_manager.is_logged_on()

diff --git a/ocw/esgf/main.py b/ocw/esgf/main.py
index 5c90042..0fb4656 100644
--- a/ocw/esgf/main.py
+++ b/ocw/esgf/main.py

@@ -16,21 +16,23 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''
+"""
 Example main program for ESGF-RCMES integration.
-    
-'''
+
+"""
+
+from __future__ import print_function
+
+from ocw.esgf.download import download
+from ocw.esgf.logon import logon
+from ocw.esgf.search import SearchClient
 
 # constant parameters
 DATA_DIRECTORY = "/tmp"
 
-from ocw.esgf.logon import logon
-from ocw.esgf.search import SearchClient
-from ocw.esgf.download import download
-
 
 def main():
-    '''Example driver program'''
+    """Example driver program"""
 
     username = raw_input('Enter your ESGF Username:\n')
     password = raw_input('Enter your ESGF Password:\n')
@@ -42,8 +44,8 @@
         print("...done.")
 
     # step 2: execute faceted search for files
-    urls = main_obs4mips()
-    #urls = main_cmip5()
+    # urls = main_obs4mips()
+    urls = main_cmip5()
 
     # step 3: download file(s)
     for i, url in enumerate(urls):
@@ -53,66 +55,66 @@
 
 
 def main_cmip5():
-    '''
+    """
     Example workflow to search for CMIP5 files
-    '''
+    """
 
-    searchClient = SearchClient(
+    search_client = SearchClient(
         searchServiceUrl="http://pcmdi9.llnl.gov/esg-search/search", distrib=False)
 
-    print('\nAvailable projects=%s' % searchClient.getFacets('project'))
-    searchClient.setConstraint(project='CMIP5')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable projects=%s' % search_client.getFacets('project'))
+    search_client.setConstraint(project='CMIP5')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable models=%s' % searchClient.getFacets('model'))
-    searchClient.setConstraint(model='INM-CM4')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable models=%s' % search_client.getFacets('model'))
+    search_client.setConstraint(model='INM-CM4')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable experiments=%s' % searchClient.getFacets('experiment'))
-    searchClient.setConstraint(experiment='historical')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable experiments=%s' % search_client.getFacets('experiment'))
+    search_client.setConstraint(experiment='historical')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable time frequencies=%s' %
-          searchClient.getFacets('time_frequency'))
-    searchClient.setConstraint(time_frequency='mon')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable time frequencies=%s' % search_client.getFacets('time_frequency'))
+    search_client.setConstraint(time_frequency='mon')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable CF standard names=%s' %
-          searchClient.getFacets('cf_standard_name'))
-    searchClient.setConstraint(cf_standard_name='air_temperature')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable CF standard names=%s' % search_client.getFacets('cf_standard_name'))
+    search_client.setConstraint(cf_standard_name='air_temperature')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    urls = searchClient.getFiles()
+    urls = search_client.getFiles()
+
     return urls
 
 
 def main_obs4mips():
-    '''
+    """
     Example workflow to search for obs4MIPs files.
-    '''
+    """
 
-    searchClient = SearchClient(distrib=False)
+    search_client = SearchClient(distrib=False)
 
     # obs4MIPs
-    print('\nAvailable projects=%s' % searchClient.getFacets('project'))
-    searchClient.setConstraint(project='obs4MIPs')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable projects=%s' % search_client.getFacets('project'))
+    search_client.setConstraint(project='obs4MIPs')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable variables=%s' % searchClient.getFacets('variable'))
-    searchClient.setConstraint(variable='hus')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable variables=%s' % search_client.getFacets('variable'))
+    search_client.setConstraint(variable='hus')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable time frequencies=%s' %
-          searchClient.getFacets('time_frequency'))
-    searchClient.setConstraint(time_frequency='mon')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable time frequencies=%s' % search_client.getFacets('time_frequency'))
+    search_client.setConstraint(time_frequency='mon')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable models=%s' % searchClient.getFacets('model'))
-    searchClient.setConstraint(model='Obs-MLS')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasetsi())
+    print('\nAvailable models=%s' % search_client.getFacets('model'))
+    search_client.setConstraint(model='Obs-MLS')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    urls = searchClient.getFiles()
+    urls = search_client.getFiles()
+
     return urls
 
+
 if __name__ == '__main__':
     main()

diff --git a/ocw/esgf/search.py b/ocw/esgf/search.py
index c2f4e12..a807c42 100644
--- a/ocw/esgf/search.py
+++ b/ocw/esgf/search.py

@@ -16,17 +16,19 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''
+"""
 RCMES module to execute a faceted search for ESGF files.
 
-'''
+"""
+
+from __future__ import print_function
 
 from pyesgf.search import SearchConnection
 
 from ocw.esgf.constants import JPL_SEARCH_SERVICE_URL
 
 
-class SearchClient():
+class SearchClient(object):
     """
     Simple ESGF search client for RCMES.
     This class is a thin layer on top of the esgfpy-client package.
@@ -36,7 +38,7 @@
     def __init__(self, searchServiceUrl=JPL_SEARCH_SERVICE_URL, distrib=True):
         """
         :param searchServiceUrl: URL of ESGF search service to query
-        :param distrib: True to execute a federation-wide search, 
+        :param distrib: True to execute a federation-wide search,
                         False to search only the specified search service
         """
         connection = SearchConnection(searchServiceUrl, distrib=distrib)
@@ -66,8 +68,10 @@
 
     def getFacets(self, facet):
         """
-        :return: a dictionary of (facet value, facet count) for the specified facet and current constraints.
-        Example (for facet='project'): {u'COUND': 4, u'CMIP5': 2657, u'obs4MIPs': 7} 
+        :return: a dictionary of (facet value, facet count) for the specified facet
+        and current constraints.
+
+        Example (for facet='project'): {u'COUND': 4, u'CMIP5': 2657, u'obs4MIPs': 7}
         """
         return self.context.facet_counts[facet]
 
@@ -82,7 +86,7 @@
             print("\nSearching files for dataset=%s with constraints: %s" %
                   (dataset.dataset_id, self.constraints))
             files = dataset.file_context().search(**self.constraints)
-            for file in files:
-                print('Found file=%s' % file.download_url)
-                urls.append(file.download_url)
+            for current_file in files:
+                print('Found file=%s' % current_file.download_url)
+                urls.append(current_file.download_url)
         return urls
commit	8e1edc873c9c2bcc68a5f14dd59299c15bdcdb51	[log] [tgz]
author	Lewis John McGibbney <lewis.mcgibbney@gmail.com>	Tue Mar 13 10:16:47 2018 -0700
committer	Lewis John McGibbney <lewis.mcgibbney@gmail.com>	Tue Mar 13 10:16:47 2018 -0700
tree	af9066062412b96ad076fdfca9910d6fa3ab7030
parent	5058b38985a81e338817f9b52d251fe342b3928c [diff]
parent	38fedad3c79bed9cf9feea7bdaee158008c07701 [diff]