blob: c5ed03af4c3581749d79fc777899b1c996ac300c [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
import urllib2
from ocw.esgf.constants import DEFAULT_ESGF_SEARCH
from ocw.esgf.download import download
from ocw.esgf.logon2 import logon2
from ocw.esgf.search import SearchClient
import ocw.data_source.local as local
from bs4 import BeautifulSoup
import requests
def load_dataset(dataset_id,
variable,
esgf_username,
esgf_password,
search_url=DEFAULT_ESGF_SEARCH,
elevation_index=0,
name='',
**additional_constraints):
''' Load an ESGF dataset.
:param dataset_id: The ESGF ID of the dataset to load.
:type dataset_id: :mod:`string`
:param variable: The variable to load.
:type variable: :mod:`string`
:param esgf_username: ESGF OpenID value to use for authentication.
:type esgf_username: :mod:`string`
:param esgf_password: ESGF Password to use for authentication.
:type esgf_password: :mod:`string`
:param search_url: (Optional) The ESGF node to use for searching. Defaults
to the Jet Propulsion Laboratory node.
:type search_url: :mod:`string`
:param elevation_index: (Optional) The elevation level to strip out when
loading the dataset using ocw.data_source.local.
:type elevation_index: :class:`int`
:param name: (Optional) A name for the loaded dataset.
:type name: :mod:`string`
:param additional_constraints: (Optional) Additional key,value pairs to
pass as constraints to the search wrapper. These can be anything found
on the ESGF metadata page for a dataset.
:returns: A :class:`list` of :class:`dataset.Dataset` contained the
requested dataset. If the dataset is stored in multiple files each will
be loaded into a separate :class:`dataset.Dataset`.
:raises ValueError: If no dataset can be found for the supplied ID and
variable, or if the requested dataset is a multi-file dataset.
'''
download_data = _get_file_download_data(url=search_url,
dataset_id=dataset_id,
variable=variable)
datasets = []
for url, var in download_data:
_download_files([url], esgf_username, esgf_password)
datasets.append(local.load_file('/tmp/' + url.split('/')[-1],
var,
name=name,
elevation_index=elevation_index))
return datasets
def _get_file_download_data(dataset_id, variable, url=DEFAULT_ESGF_SEARCH):
''''''
url += '?distrib=false&type=File&dataset_id={}&variable={}'
url = url.format(dataset_id, variable)
r = requests.get(url)
xml = BeautifulSoup(r.content)
dont_have_results = not bool(xml.response.result['numfound'])
if dont_have_results:
err = "esgf.load_dataset: No files found for specified dataset."
raise ValueError(err)
# Split out URLs for dataset download along with variable names for each
# of those files.
url_groups = xml.response.result.findAll('arr', {'name': 'url'})
variable_groups = xml.response.result.findAll('arr', {'name': 'variable'})
urls = [group.findAll('str')[0].string.split('|')[0]
for group in url_groups]
variables = [group.findAll('str')[0].string
for group in variable_groups]
return zip(urls, variables)
def _download_files(file_urls, username, password, download_directory='/tmp'):
''''''
try:
logon2(username, password)
except urllib2.HTTPError:
raise ValueError('esgf._download_files: Invalid login credentials')
for url in file_urls:
download(url, toDirectory=download_directory)